letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -28,6 +28,7 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
29
29
  from letta.log import get_logger
30
30
  from letta.schemas.letta_message import (
31
+ ApprovalRequestMessage,
31
32
  AssistantMessage,
32
33
  HiddenReasoningMessage,
33
34
  LettaMessage,
@@ -59,7 +60,12 @@ class AnthropicStreamingInterface:
59
60
  and detection of tool call events.
60
61
  """
61
62
 
62
- def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
63
+ def __init__(
64
+ self,
65
+ use_assistant_message: bool = False,
66
+ put_inner_thoughts_in_kwarg: bool = False,
67
+ requires_approval_tools: list = [],
68
+ ):
63
69
  self.json_parser: JSONParser = PydanticJSONParser()
64
70
  self.use_assistant_message = use_assistant_message
65
71
 
@@ -90,6 +96,8 @@ class AnthropicStreamingInterface:
90
96
  # Buffer to handle partial XML tags across chunks
91
97
  self.partial_tag_buffer = ""
92
98
 
99
+ self.requires_approval_tools = requires_approval_tools
100
+
93
101
  def get_tool_call_object(self) -> ToolCall:
94
102
  """Useful for agent loop"""
95
103
  if not self.tool_call_name:
@@ -98,15 +106,19 @@ class AnthropicStreamingInterface:
98
106
  try:
99
107
  tool_input = json.loads(self.accumulated_tool_call_args)
100
108
  except json.JSONDecodeError as e:
101
- logger.warning(
102
- f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
103
- f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
104
- )
105
- raise
109
+ # Attempt to use OptimisticJSONParser to handle incomplete/malformed JSON
110
+ try:
111
+ tool_input = self.json_parser.parse(self.accumulated_tool_call_args)
112
+ except:
113
+ logger.warning(
114
+ f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
115
+ f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
116
+ )
117
+ raise e
106
118
  if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
107
119
  arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
108
120
  else:
109
- arguments = self.accumulated_tool_call_args
121
+ arguments = str(json.dumps(tool_input, indent=2))
110
122
  return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name))
111
123
 
112
124
  def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
@@ -218,11 +230,12 @@ class AnthropicStreamingInterface:
218
230
  except Exception as e:
219
231
  import traceback
220
232
 
221
- logger.error("Error processing stream: %s", e, traceback.format_exc())
222
- ttft_span.add_event(
223
- name="stop_reason",
224
- attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
225
- )
233
+ logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
234
+ if ttft_span:
235
+ ttft_span.add_event(
236
+ name="stop_reason",
237
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
238
+ )
226
239
  yield LettaStopReason(stop_reason=StopReasonType.error)
227
240
  raise e
228
241
  finally:
@@ -256,13 +269,15 @@ class AnthropicStreamingInterface:
256
269
  self.inner_thoughts_complete = False
257
270
 
258
271
  if not self.use_assistant_message:
259
- # Buffer the initial tool call message instead of yielding immediately
260
- tool_call_msg = ToolCallMessage(
261
- id=self.letta_message_id,
262
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
263
- date=datetime.now(timezone.utc).isoformat(),
264
- )
265
- self.tool_call_buffer.append(tool_call_msg)
272
+ # Only buffer the initial tool call message if it doesn't require approval
273
+ # For approval-required tools, we'll create the ApprovalRequestMessage later
274
+ if self.tool_call_name not in self.requires_approval_tools:
275
+ tool_call_msg = ToolCallMessage(
276
+ id=self.letta_message_id,
277
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
278
+ date=datetime.now(timezone.utc).isoformat(),
279
+ )
280
+ self.tool_call_buffer.append(tool_call_msg)
266
281
  elif isinstance(content, BetaThinkingBlock):
267
282
  self.anthropic_mode = EventMode.THINKING
268
283
  # TODO: Can capture signature, etc.
@@ -353,11 +368,36 @@ class AnthropicStreamingInterface:
353
368
  prev_message_type = reasoning_message.message_type
354
369
  yield reasoning_message
355
370
 
356
- # Check if inner thoughts are complete - if so, flush the buffer
371
+ # Check if inner thoughts are complete - if so, flush the buffer or create approval message
357
372
  if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
358
373
  self.inner_thoughts_complete = True
359
- # Flush all buffered tool call messages
360
- if len(self.tool_call_buffer) > 0:
374
+
375
+ # Check if this tool requires approval
376
+ if self.tool_call_name in self.requires_approval_tools:
377
+ # Create ApprovalRequestMessage directly (buffer should be empty)
378
+ if prev_message_type and prev_message_type != "approval_request_message":
379
+ message_index += 1
380
+
381
+ # Strip out inner thoughts from arguments
382
+ tool_call_args = self.accumulated_tool_call_args
383
+ if current_inner_thoughts:
384
+ tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
385
+
386
+ approval_msg = ApprovalRequestMessage(
387
+ id=self.letta_message_id,
388
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
389
+ date=datetime.now(timezone.utc).isoformat(),
390
+ name=self.tool_call_name,
391
+ tool_call=ToolCallDelta(
392
+ name=self.tool_call_name,
393
+ tool_call_id=self.tool_call_id,
394
+ arguments=tool_call_args,
395
+ ),
396
+ )
397
+ prev_message_type = approval_msg.message_type
398
+ yield approval_msg
399
+ elif len(self.tool_call_buffer) > 0:
400
+ # Flush buffered tool call messages for non-approval tools
361
401
  if prev_message_type and prev_message_type != "tool_call_message":
362
402
  message_index += 1
363
403
 
@@ -371,9 +411,6 @@ class AnthropicStreamingInterface:
371
411
  id=self.tool_call_buffer[0].id,
372
412
  otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
373
413
  date=self.tool_call_buffer[0].date,
374
- name=self.tool_call_buffer[0].name,
375
- sender_id=self.tool_call_buffer[0].sender_id,
376
- step_id=self.tool_call_buffer[0].step_id,
377
414
  tool_call=ToolCallDelta(
378
415
  name=self.tool_call_name,
379
416
  tool_call_id=self.tool_call_id,
@@ -404,11 +441,18 @@ class AnthropicStreamingInterface:
404
441
  yield assistant_msg
405
442
  else:
406
443
  # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
407
- tool_call_msg = ToolCallMessage(
408
- id=self.letta_message_id,
409
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
410
- date=datetime.now(timezone.utc).isoformat(),
411
- )
444
+ if self.tool_call_name in self.requires_approval_tools:
445
+ tool_call_msg = ApprovalRequestMessage(
446
+ id=self.letta_message_id,
447
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
448
+ date=datetime.now(timezone.utc).isoformat(),
449
+ )
450
+ else:
451
+ tool_call_msg = ToolCallMessage(
452
+ id=self.letta_message_id,
453
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
454
+ date=datetime.now(timezone.utc).isoformat(),
455
+ )
412
456
  if self.inner_thoughts_complete:
413
457
  if prev_message_type and prev_message_type != "tool_call_message":
414
458
  message_index += 1
@@ -11,6 +11,7 @@ from letta.llm_api.openai_client import is_openai_reasoning_model
11
11
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
12
12
  from letta.log import get_logger
13
13
  from letta.schemas.letta_message import (
14
+ ApprovalRequestMessage,
14
15
  AssistantMessage,
15
16
  HiddenReasoningMessage,
16
17
  LettaMessage,
@@ -43,6 +44,7 @@ class OpenAIStreamingInterface:
43
44
  messages: Optional[list] = None,
44
45
  tools: Optional[list] = None,
45
46
  put_inner_thoughts_in_kwarg: bool = True,
47
+ requires_approval_tools: list = [],
46
48
  ):
47
49
  self.use_assistant_message = use_assistant_message
48
50
  self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
@@ -86,6 +88,8 @@ class OpenAIStreamingInterface:
86
88
  self.reasoning_messages = []
87
89
  self.emitted_hidden_reasoning = False # Track if we've emitted hidden reasoning message
88
90
 
91
+ self.requires_approval_tools = requires_approval_tools
92
+
89
93
  def get_reasoning_content(self) -> list[TextContent | OmittedReasoningContent]:
90
94
  content = "".join(self.reasoning_messages).strip()
91
95
 
@@ -162,11 +166,12 @@ class OpenAIStreamingInterface:
162
166
  except Exception as e:
163
167
  import traceback
164
168
 
165
- logger.error("Error processing stream: %s", e, traceback.format_exc())
166
- ttft_span.add_event(
167
- name="stop_reason",
168
- attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
169
- )
169
+ logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
170
+ if ttft_span:
171
+ ttft_span.add_event(
172
+ name="stop_reason",
173
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
174
+ )
170
175
  yield LettaStopReason(stop_reason=StopReasonType.error)
171
176
  raise e
172
177
  finally:
@@ -274,16 +279,28 @@ class OpenAIStreamingInterface:
274
279
  if prev_message_type and prev_message_type != "tool_call_message":
275
280
  message_index += 1
276
281
  self.tool_call_name = str(self.function_name_buffer)
277
- tool_call_msg = ToolCallMessage(
278
- id=self.letta_message_id,
279
- date=datetime.now(timezone.utc),
280
- tool_call=ToolCallDelta(
281
- name=self.function_name_buffer,
282
- arguments=None,
283
- tool_call_id=self.function_id_buffer,
284
- ),
285
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
286
- )
282
+ if self.tool_call_name in self.requires_approval_tools:
283
+ tool_call_msg = ApprovalRequestMessage(
284
+ id=self.letta_message_id,
285
+ date=datetime.now(timezone.utc),
286
+ tool_call=ToolCallDelta(
287
+ name=self.function_name_buffer,
288
+ arguments=None,
289
+ tool_call_id=self.function_id_buffer,
290
+ ),
291
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
292
+ )
293
+ else:
294
+ tool_call_msg = ToolCallMessage(
295
+ id=self.letta_message_id,
296
+ date=datetime.now(timezone.utc),
297
+ tool_call=ToolCallDelta(
298
+ name=self.function_name_buffer,
299
+ arguments=None,
300
+ tool_call_id=self.function_id_buffer,
301
+ ),
302
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
303
+ )
287
304
  prev_message_type = tool_call_msg.message_type
288
305
  yield tool_call_msg
289
306
 
@@ -404,17 +421,30 @@ class OpenAIStreamingInterface:
404
421
  combined_chunk = self.function_args_buffer + updates_main_json
405
422
  if prev_message_type and prev_message_type != "tool_call_message":
406
423
  message_index += 1
407
- tool_call_msg = ToolCallMessage(
408
- id=self.letta_message_id,
409
- date=datetime.now(timezone.utc),
410
- tool_call=ToolCallDelta(
411
- name=self.function_name_buffer,
412
- arguments=combined_chunk,
413
- tool_call_id=self.function_id_buffer,
414
- ),
415
- # name=name,
416
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
417
- )
424
+ if self.function_name_buffer in self.requires_approval_tools:
425
+ tool_call_msg = ApprovalRequestMessage(
426
+ id=self.letta_message_id,
427
+ date=datetime.now(timezone.utc),
428
+ tool_call=ToolCallDelta(
429
+ name=self.function_name_buffer,
430
+ arguments=combined_chunk,
431
+ tool_call_id=self.function_id_buffer,
432
+ ),
433
+ # name=name,
434
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
435
+ )
436
+ else:
437
+ tool_call_msg = ToolCallMessage(
438
+ id=self.letta_message_id,
439
+ date=datetime.now(timezone.utc),
440
+ tool_call=ToolCallDelta(
441
+ name=self.function_name_buffer,
442
+ arguments=combined_chunk,
443
+ tool_call_id=self.function_id_buffer,
444
+ ),
445
+ # name=name,
446
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
447
+ )
418
448
  prev_message_type = tool_call_msg.message_type
419
449
  yield tool_call_msg
420
450
  # clear buffer
@@ -424,17 +454,30 @@ class OpenAIStreamingInterface:
424
454
  # If there's no buffer to clear, just output a new chunk with new data
425
455
  if prev_message_type and prev_message_type != "tool_call_message":
426
456
  message_index += 1
427
- tool_call_msg = ToolCallMessage(
428
- id=self.letta_message_id,
429
- date=datetime.now(timezone.utc),
430
- tool_call=ToolCallDelta(
431
- name=None,
432
- arguments=updates_main_json,
433
- tool_call_id=self.function_id_buffer,
434
- ),
435
- # name=name,
436
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
437
- )
457
+ if self.function_name_buffer in self.requires_approval_tools:
458
+ tool_call_msg = ApprovalRequestMessage(
459
+ id=self.letta_message_id,
460
+ date=datetime.now(timezone.utc),
461
+ tool_call=ToolCallDelta(
462
+ name=None,
463
+ arguments=updates_main_json,
464
+ tool_call_id=self.function_id_buffer,
465
+ ),
466
+ # name=name,
467
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
468
+ )
469
+ else:
470
+ tool_call_msg = ToolCallMessage(
471
+ id=self.letta_message_id,
472
+ date=datetime.now(timezone.utc),
473
+ tool_call=ToolCallDelta(
474
+ name=None,
475
+ arguments=updates_main_json,
476
+ tool_call_id=self.function_id_buffer,
477
+ ),
478
+ # name=name,
479
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
480
+ )
438
481
  prev_message_type = tool_call_msg.message_type
439
482
  yield tool_call_msg
440
483
  self.function_id_buffer = None
@@ -272,7 +272,7 @@ class GoogleVertexClient(LLMClientBase):
272
272
  tool_names = []
273
273
 
274
274
  contents = self.add_dummy_model_messages(
275
- [m.to_google_ai_dict() for m in messages],
275
+ PydanticMessage.to_google_dicts_from_list(messages),
276
276
  )
277
277
 
278
278
  request_data = {
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import os
2
3
  from typing import List, Optional
3
4
 
@@ -319,13 +320,53 @@ class OpenAIClient(LLMClientBase):
319
320
 
320
321
  @trace_method
321
322
  async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
322
- """Request embeddings given texts and embedding config"""
323
+ """Request embeddings given texts and embedding config with chunking and retry logic"""
324
+ if not inputs:
325
+ return []
326
+
323
327
  kwargs = self._prepare_client_kwargs_embedding(embedding_config)
324
328
  client = AsyncOpenAI(**kwargs)
325
- response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
326
329
 
327
- # TODO: add total usage
328
- return [r.embedding for r in response.data]
330
+ # track results by original index to maintain order
331
+ results = [None] * len(inputs)
332
+
333
+ # queue of (start_idx, chunk_inputs) to process
334
+ chunks_to_process = [(i, inputs[i : i + 2048]) for i in range(0, len(inputs), 2048)]
335
+
336
+ min_chunk_size = 256
337
+
338
+ while chunks_to_process:
339
+ tasks = []
340
+ task_metadata = []
341
+
342
+ for start_idx, chunk_inputs in chunks_to_process:
343
+ task = client.embeddings.create(model=embedding_config.embedding_model, input=chunk_inputs)
344
+ tasks.append(task)
345
+ task_metadata.append((start_idx, chunk_inputs))
346
+
347
+ task_results = await asyncio.gather(*tasks, return_exceptions=True)
348
+
349
+ failed_chunks = []
350
+ for (start_idx, chunk_inputs), result in zip(task_metadata, task_results):
351
+ if isinstance(result, Exception):
352
+ # check if we can retry with smaller chunks
353
+ if len(chunk_inputs) > min_chunk_size:
354
+ # split chunk in half and queue for retry
355
+ mid = len(chunk_inputs) // 2
356
+ failed_chunks.append((start_idx, chunk_inputs[:mid]))
357
+ failed_chunks.append((start_idx + mid, chunk_inputs[mid:]))
358
+ else:
359
+ # can't split further, re-raise the error
360
+ logger.error(f"Failed to get embeddings for chunk starting at {start_idx} even with minimum size {min_chunk_size}")
361
+ raise result
362
+ else:
363
+ embeddings = [r.embedding for r in result.data]
364
+ for i, embedding in enumerate(embeddings):
365
+ results[start_idx + i] = embedding
366
+
367
+ chunks_to_process = failed_chunks
368
+
369
+ return results
329
370
 
330
371
  @trace_method
331
372
  def handle_llm_error(self, e: Exception) -> Exception:
letta/orm/agent.py CHANGED
@@ -34,7 +34,10 @@ if TYPE_CHECKING:
34
34
  class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin, TemplateMixin, AsyncAttrs):
35
35
  __tablename__ = "agents"
36
36
  __pydantic_model__ = PydanticAgentState
37
- __table_args__ = (Index("ix_agents_created_at", "created_at", "id"),)
37
+ __table_args__ = (
38
+ Index("ix_agents_created_at", "created_at", "id"),
39
+ Index("ix_agents_organization_id", "organization_id"),
40
+ )
38
41
 
39
42
  # agent generates its own id
40
43
  # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
letta/orm/block.py CHANGED
@@ -24,6 +24,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
24
24
  __table_args__ = (
25
25
  UniqueConstraint("id", "label", name="unique_block_id_label"),
26
26
  Index("created_at_label_idx", "created_at", "label"),
27
+ Index("ix_block_label", "label"),
27
28
  )
28
29
 
29
30
  template_name: Mapped[Optional[str]] = mapped_column(
@@ -41,6 +42,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
41
42
 
42
43
  # permissions of the agent
43
44
  read_only: Mapped[bool] = mapped_column(doc="whether the agent has read-only access to the block", default=False)
45
+ hidden: Mapped[Optional[bool]] = mapped_column(nullable=True, doc="If set to True, the block will be hidden.")
44
46
 
45
47
  # history pointers / locking mechanisms
46
48
  current_history_entry_id: Mapped[Optional[str]] = mapped_column(
@@ -20,6 +20,7 @@ class BlocksAgents(Base):
20
20
  UniqueConstraint("agent_id", "block_id", name="unique_agent_block"),
21
21
  Index("ix_blocks_agents_block_label_agent_id", "block_label", "agent_id"),
22
22
  Index("ix_blocks_block_label", "block_label"),
23
+ Index("ix_blocks_agents_block_id", "block_id"),
23
24
  )
24
25
 
25
26
  # unique agent + block label
letta/orm/group.py CHANGED
@@ -24,6 +24,7 @@ class Group(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateMixin):
24
24
  min_message_buffer_length: Mapped[Optional[int]] = mapped_column(nullable=True, doc="")
25
25
  turns_counter: Mapped[Optional[int]] = mapped_column(nullable=True, doc="")
26
26
  last_processed_message_id: Mapped[Optional[str]] = mapped_column(nullable=True, doc="")
27
+ hidden: Mapped[Optional[bool]] = mapped_column(nullable=True, doc="If set to True, the group will be hidden.")
27
28
 
28
29
  # relationships
29
30
  organization: Mapped["Organization"] = relationship("Organization", back_populates="groups")
letta/orm/source.py CHANGED
@@ -1,12 +1,13 @@
1
1
  from typing import TYPE_CHECKING, Optional
2
2
 
3
- from sqlalchemy import JSON, Index, UniqueConstraint
3
+ from sqlalchemy import JSON, Enum, Index, UniqueConstraint
4
4
  from sqlalchemy.orm import Mapped, mapped_column
5
5
 
6
6
  from letta.orm.custom_columns import EmbeddingConfigColumn
7
7
  from letta.orm.mixins import OrganizationMixin
8
8
  from letta.orm.sqlalchemy_base import SqlalchemyBase
9
9
  from letta.schemas.embedding_config import EmbeddingConfig
10
+ from letta.schemas.enums import VectorDBProvider
10
11
  from letta.schemas.source import Source as PydanticSource
11
12
 
12
13
  if TYPE_CHECKING:
@@ -30,3 +31,9 @@ class Source(SqlalchemyBase, OrganizationMixin):
30
31
  instructions: Mapped[str] = mapped_column(nullable=True, doc="instructions for how to use the source")
31
32
  embedding_config: Mapped[EmbeddingConfig] = mapped_column(EmbeddingConfigColumn, doc="Configuration settings for embedding.")
32
33
  metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="metadata for the source.")
34
+ vector_db_provider: Mapped[VectorDBProvider] = mapped_column(
35
+ Enum(VectorDBProvider),
36
+ nullable=False,
37
+ default=VectorDBProvider.NATIVE,
38
+ doc="The vector database provider used for this source's passages",
39
+ )
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String
1
+ from sqlalchemy import ForeignKey, Index, String
2
2
  from sqlalchemy.orm import Mapped, mapped_column
3
3
 
4
4
  from letta.orm.base import Base
@@ -8,6 +8,7 @@ class SourcesAgents(Base):
8
8
  """Agents can have zero to many sources"""
9
9
 
10
10
  __tablename__ = "sources_agents"
11
+ __table_args__ = (Index("ix_sources_agents_source_id", "source_id"),)
11
12
 
12
13
  agent_id: Mapped[String] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
13
14
  source_id: Mapped[String] = mapped_column(String, ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True)
letta/orm/step_metrics.py CHANGED
@@ -43,6 +43,16 @@ class StepMetrics(SqlalchemyBase, ProjectMixin, AgentMixin):
43
43
  nullable=True,
44
44
  doc="The unique identifier of the job",
45
45
  )
46
+ step_start_ns: Mapped[Optional[int]] = mapped_column(
47
+ BigInteger,
48
+ nullable=True,
49
+ doc="The timestamp of the start of the step in nanoseconds",
50
+ )
51
+ llm_request_start_ns: Mapped[Optional[int]] = mapped_column(
52
+ BigInteger,
53
+ nullable=True,
54
+ doc="The timestamp of the start of the LLM request in nanoseconds",
55
+ )
46
56
  llm_request_ns: Mapped[Optional[int]] = mapped_column(
47
57
  BigInteger,
48
58
  nullable=True,
letta/orm/tools_agents.py CHANGED
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String, UniqueConstraint
1
+ from sqlalchemy import ForeignKey, Index, String, UniqueConstraint
2
2
  from sqlalchemy.orm import Mapped, mapped_column
3
3
 
4
4
  from letta.orm import Base
@@ -8,7 +8,10 @@ class ToolsAgents(Base):
8
8
  """Agents can have one or many tools associated with them."""
9
9
 
10
10
  __tablename__ = "tools_agents"
11
- __table_args__ = (UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),)
11
+ __table_args__ = (
12
+ UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),
13
+ Index("ix_tools_agents_tool_id", "tool_id"),
14
+ )
12
15
 
13
16
  # Each agent must have unique tool names
14
17
  agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
letta/schemas/block.py CHANGED
@@ -38,6 +38,10 @@ class BaseBlock(LettaBase, validate_assignment=True):
38
38
  # metadata
39
39
  description: Optional[str] = Field(None, description="Description of the block.")
40
40
  metadata: Optional[dict] = Field({}, description="Metadata of the block.")
41
+ hidden: Optional[bool] = Field(
42
+ None,
43
+ description="If set to True, the block will be hidden.",
44
+ )
41
45
 
42
46
  # def __len__(self):
43
47
  # return len(self.value)
letta/schemas/enums.py CHANGED
@@ -180,6 +180,7 @@ class VectorDBProvider(str, Enum):
180
180
 
181
181
  NATIVE = "native"
182
182
  TPUF = "tpuf"
183
+ PINECONE = "pinecone"
183
184
 
184
185
 
185
186
  class TagMatchMode(str, Enum):
letta/schemas/group.py CHANGED
@@ -49,6 +49,10 @@ class Group(GroupBase):
49
49
  None,
50
50
  description="The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving.",
51
51
  )
52
+ hidden: Optional[bool] = Field(
53
+ None,
54
+ description="If set to True, the group will be hidden.",
55
+ )
52
56
 
53
57
  @property
54
58
  def manager_config(self) -> ManagerConfig:
@@ -170,6 +174,10 @@ class GroupCreate(BaseModel):
170
174
  manager_config: ManagerConfigUnion = Field(RoundRobinManager(), description="")
171
175
  project_id: Optional[str] = Field(None, description="The associated project id.")
172
176
  shared_block_ids: List[str] = Field([], description="")
177
+ hidden: Optional[bool] = Field(
178
+ None,
179
+ description="If set to True, the group will be hidden.",
180
+ )
173
181
 
174
182
 
175
183
  class InternalTemplateGroupCreate(GroupCreate):
@@ -265,7 +265,7 @@ class ApprovalRequestMessage(LettaMessage):
265
265
  message_type: Literal[MessageType.approval_request_message] = Field(
266
266
  default=MessageType.approval_request_message, description="The type of the message."
267
267
  )
268
- tool_call: ToolCall = Field(..., description="The tool call that has been requested by the llm to run")
268
+ tool_call: Union[ToolCall, ToolCallDelta] = Field(..., description="The tool call that has been requested by the llm to run")
269
269
 
270
270
 
271
271
  class ApprovalResponseMessage(LettaMessage):
@@ -60,7 +60,7 @@ class LettaStreamingRequest(LettaRequest):
60
60
  description="Flag to determine if individual tokens should be streamed, rather than streaming per step.",
61
61
  )
62
62
  include_pings: bool = Field(
63
- default=False,
63
+ default=True,
64
64
  description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
65
65
  )
66
66
  background: bool = Field(
@@ -94,7 +94,7 @@ class RetrieveStreamRequest(BaseModel):
94
94
  0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
95
95
  )
96
96
  include_pings: Optional[bool] = Field(
97
- default=False,
97
+ default=True,
98
98
  description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
99
99
  )
100
100
  poll_interval: Optional[float] = Field(
letta/schemas/mcp.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from typing import Any, Dict, Optional, Union
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  from pydantic import Field
5
5
 
@@ -175,3 +175,11 @@ class MCPOAuthSessionUpdate(BaseMCPOAuth):
175
175
  client_secret: Optional[str] = Field(None, description="OAuth client secret")
176
176
  redirect_uri: Optional[str] = Field(None, description="OAuth redirect URI")
177
177
  status: Optional[OAuthSessionStatus] = Field(None, description="Session status")
178
+
179
+
180
+ class MCPServerResyncResult(LettaBase):
181
+ """Result of resyncing MCP server tools."""
182
+
183
+ deleted: List[str] = Field(default_factory=list, description="List of deleted tool names")
184
+ updated: List[str] = Field(default_factory=list, description="List of updated tool names")
185
+ added: List[str] = Field(default_factory=list, description="List of added tool names")