letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +111 -0
  3. letta/adapters/letta_llm_stream_adapter.py +169 -0
  4. letta/agents/base_agent.py +4 -1
  5. letta/agents/base_agent_v2.py +68 -0
  6. letta/agents/helpers.py +3 -5
  7. letta/agents/letta_agent.py +23 -12
  8. letta/agents/letta_agent_v2.py +1220 -0
  9. letta/agents/voice_agent.py +2 -1
  10. letta/constants.py +1 -1
  11. letta/errors.py +12 -0
  12. letta/functions/function_sets/base.py +53 -12
  13. letta/functions/schema_generator.py +1 -1
  14. letta/groups/sleeptime_multi_agent_v3.py +231 -0
  15. letta/helpers/tool_rule_solver.py +4 -0
  16. letta/helpers/tpuf_client.py +607 -34
  17. letta/interfaces/anthropic_streaming_interface.py +64 -24
  18. letta/interfaces/openai_streaming_interface.py +80 -37
  19. letta/llm_api/openai_client.py +45 -4
  20. letta/orm/block.py +1 -0
  21. letta/orm/group.py +1 -0
  22. letta/orm/source.py +8 -1
  23. letta/orm/step_metrics.py +10 -0
  24. letta/schemas/block.py +4 -0
  25. letta/schemas/enums.py +1 -0
  26. letta/schemas/group.py +8 -0
  27. letta/schemas/letta_message.py +1 -1
  28. letta/schemas/letta_request.py +2 -2
  29. letta/schemas/mcp.py +9 -1
  30. letta/schemas/message.py +23 -0
  31. letta/schemas/providers/ollama.py +1 -1
  32. letta/schemas/providers.py +1 -2
  33. letta/schemas/source.py +6 -0
  34. letta/schemas/step_metrics.py +2 -0
  35. letta/server/rest_api/routers/v1/__init__.py +2 -0
  36. letta/server/rest_api/routers/v1/agents.py +100 -5
  37. letta/server/rest_api/routers/v1/blocks.py +6 -0
  38. letta/server/rest_api/routers/v1/folders.py +23 -5
  39. letta/server/rest_api/routers/v1/groups.py +6 -0
  40. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  41. letta/server/rest_api/routers/v1/messages.py +14 -19
  42. letta/server/rest_api/routers/v1/runs.py +43 -28
  43. letta/server/rest_api/routers/v1/sources.py +23 -5
  44. letta/server/rest_api/routers/v1/tools.py +42 -0
  45. letta/server/rest_api/streaming_response.py +9 -1
  46. letta/server/server.py +2 -1
  47. letta/services/agent_manager.py +39 -59
  48. letta/services/agent_serialization_manager.py +22 -8
  49. letta/services/archive_manager.py +60 -9
  50. letta/services/block_manager.py +5 -0
  51. letta/services/file_processor/embedder/base_embedder.py +5 -0
  52. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  53. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  54. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  55. letta/services/file_processor/file_processor.py +9 -7
  56. letta/services/group_manager.py +74 -11
  57. letta/services/mcp_manager.py +132 -26
  58. letta/services/message_manager.py +229 -125
  59. letta/services/passage_manager.py +2 -1
  60. letta/services/source_manager.py +23 -1
  61. letta/services/summarizer/summarizer.py +2 -0
  62. letta/services/tool_executor/core_tool_executor.py +2 -120
  63. letta/services/tool_executor/files_tool_executor.py +133 -8
  64. letta/settings.py +6 -0
  65. letta/utils.py +34 -1
  66. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA +2 -2
  67. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/RECORD +70 -63
  68. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/WHEEL +0 -0
  69. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/entry_points.txt +0 -0
  70. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/licenses/LICENSE +0 -0
@@ -28,6 +28,7 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
29
29
  from letta.log import get_logger
30
30
  from letta.schemas.letta_message import (
31
+ ApprovalRequestMessage,
31
32
  AssistantMessage,
32
33
  HiddenReasoningMessage,
33
34
  LettaMessage,
@@ -59,7 +60,12 @@ class AnthropicStreamingInterface:
59
60
  and detection of tool call events.
60
61
  """
61
62
 
62
- def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
63
+ def __init__(
64
+ self,
65
+ use_assistant_message: bool = False,
66
+ put_inner_thoughts_in_kwarg: bool = False,
67
+ requires_approval_tools: list = [],
68
+ ):
63
69
  self.json_parser: JSONParser = PydanticJSONParser()
64
70
  self.use_assistant_message = use_assistant_message
65
71
 
@@ -90,6 +96,8 @@ class AnthropicStreamingInterface:
90
96
  # Buffer to handle partial XML tags across chunks
91
97
  self.partial_tag_buffer = ""
92
98
 
99
+ self.requires_approval_tools = requires_approval_tools
100
+
93
101
  def get_tool_call_object(self) -> ToolCall:
94
102
  """Useful for agent loop"""
95
103
  if not self.tool_call_name:
@@ -218,11 +226,12 @@ class AnthropicStreamingInterface:
218
226
  except Exception as e:
219
227
  import traceback
220
228
 
221
- logger.error("Error processing stream: %s", e, traceback.format_exc())
222
- ttft_span.add_event(
223
- name="stop_reason",
224
- attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
225
- )
229
+ logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
230
+ if ttft_span:
231
+ ttft_span.add_event(
232
+ name="stop_reason",
233
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
234
+ )
226
235
  yield LettaStopReason(stop_reason=StopReasonType.error)
227
236
  raise e
228
237
  finally:
@@ -256,13 +265,15 @@ class AnthropicStreamingInterface:
256
265
  self.inner_thoughts_complete = False
257
266
 
258
267
  if not self.use_assistant_message:
259
- # Buffer the initial tool call message instead of yielding immediately
260
- tool_call_msg = ToolCallMessage(
261
- id=self.letta_message_id,
262
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
263
- date=datetime.now(timezone.utc).isoformat(),
264
- )
265
- self.tool_call_buffer.append(tool_call_msg)
268
+ # Only buffer the initial tool call message if it doesn't require approval
269
+ # For approval-required tools, we'll create the ApprovalRequestMessage later
270
+ if self.tool_call_name not in self.requires_approval_tools:
271
+ tool_call_msg = ToolCallMessage(
272
+ id=self.letta_message_id,
273
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
274
+ date=datetime.now(timezone.utc).isoformat(),
275
+ )
276
+ self.tool_call_buffer.append(tool_call_msg)
266
277
  elif isinstance(content, BetaThinkingBlock):
267
278
  self.anthropic_mode = EventMode.THINKING
268
279
  # TODO: Can capture signature, etc.
@@ -353,11 +364,36 @@ class AnthropicStreamingInterface:
353
364
  prev_message_type = reasoning_message.message_type
354
365
  yield reasoning_message
355
366
 
356
- # Check if inner thoughts are complete - if so, flush the buffer
367
+ # Check if inner thoughts are complete - if so, flush the buffer or create approval message
357
368
  if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
358
369
  self.inner_thoughts_complete = True
359
- # Flush all buffered tool call messages
360
- if len(self.tool_call_buffer) > 0:
370
+
371
+ # Check if this tool requires approval
372
+ if self.tool_call_name in self.requires_approval_tools:
373
+ # Create ApprovalRequestMessage directly (buffer should be empty)
374
+ if prev_message_type and prev_message_type != "approval_request_message":
375
+ message_index += 1
376
+
377
+ # Strip out inner thoughts from arguments
378
+ tool_call_args = self.accumulated_tool_call_args
379
+ if current_inner_thoughts:
380
+ tool_call_args = tool_call_args.replace(f'"{INNER_THOUGHTS_KWARG}": "{current_inner_thoughts}"', "")
381
+
382
+ approval_msg = ApprovalRequestMessage(
383
+ id=self.letta_message_id,
384
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
385
+ date=datetime.now(timezone.utc).isoformat(),
386
+ name=self.tool_call_name,
387
+ tool_call=ToolCallDelta(
388
+ name=self.tool_call_name,
389
+ tool_call_id=self.tool_call_id,
390
+ arguments=tool_call_args,
391
+ ),
392
+ )
393
+ prev_message_type = approval_msg.message_type
394
+ yield approval_msg
395
+ elif len(self.tool_call_buffer) > 0:
396
+ # Flush buffered tool call messages for non-approval tools
361
397
  if prev_message_type and prev_message_type != "tool_call_message":
362
398
  message_index += 1
363
399
 
@@ -371,9 +407,6 @@ class AnthropicStreamingInterface:
371
407
  id=self.tool_call_buffer[0].id,
372
408
  otid=Message.generate_otid_from_id(self.tool_call_buffer[0].id, message_index),
373
409
  date=self.tool_call_buffer[0].date,
374
- name=self.tool_call_buffer[0].name,
375
- sender_id=self.tool_call_buffer[0].sender_id,
376
- step_id=self.tool_call_buffer[0].step_id,
377
410
  tool_call=ToolCallDelta(
378
411
  name=self.tool_call_name,
379
412
  tool_call_id=self.tool_call_id,
@@ -404,11 +437,18 @@ class AnthropicStreamingInterface:
404
437
  yield assistant_msg
405
438
  else:
406
439
  # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
407
- tool_call_msg = ToolCallMessage(
408
- id=self.letta_message_id,
409
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
410
- date=datetime.now(timezone.utc).isoformat(),
411
- )
440
+ if self.tool_call_name in self.requires_approval_tools:
441
+ tool_call_msg = ApprovalRequestMessage(
442
+ id=self.letta_message_id,
443
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
444
+ date=datetime.now(timezone.utc).isoformat(),
445
+ )
446
+ else:
447
+ tool_call_msg = ToolCallMessage(
448
+ id=self.letta_message_id,
449
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id, arguments=delta.partial_json),
450
+ date=datetime.now(timezone.utc).isoformat(),
451
+ )
412
452
  if self.inner_thoughts_complete:
413
453
  if prev_message_type and prev_message_type != "tool_call_message":
414
454
  message_index += 1
@@ -11,6 +11,7 @@ from letta.llm_api.openai_client import is_openai_reasoning_model
11
11
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
12
12
  from letta.log import get_logger
13
13
  from letta.schemas.letta_message import (
14
+ ApprovalRequestMessage,
14
15
  AssistantMessage,
15
16
  HiddenReasoningMessage,
16
17
  LettaMessage,
@@ -43,6 +44,7 @@ class OpenAIStreamingInterface:
43
44
  messages: Optional[list] = None,
44
45
  tools: Optional[list] = None,
45
46
  put_inner_thoughts_in_kwarg: bool = True,
47
+ requires_approval_tools: list = [],
46
48
  ):
47
49
  self.use_assistant_message = use_assistant_message
48
50
  self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
@@ -86,6 +88,8 @@ class OpenAIStreamingInterface:
86
88
  self.reasoning_messages = []
87
89
  self.emitted_hidden_reasoning = False # Track if we've emitted hidden reasoning message
88
90
 
91
+ self.requires_approval_tools = requires_approval_tools
92
+
89
93
  def get_reasoning_content(self) -> list[TextContent | OmittedReasoningContent]:
90
94
  content = "".join(self.reasoning_messages).strip()
91
95
 
@@ -162,11 +166,12 @@ class OpenAIStreamingInterface:
162
166
  except Exception as e:
163
167
  import traceback
164
168
 
165
- logger.error("Error processing stream: %s", e, traceback.format_exc())
166
- ttft_span.add_event(
167
- name="stop_reason",
168
- attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
169
- )
169
+ logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
170
+ if ttft_span:
171
+ ttft_span.add_event(
172
+ name="stop_reason",
173
+ attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
174
+ )
170
175
  yield LettaStopReason(stop_reason=StopReasonType.error)
171
176
  raise e
172
177
  finally:
@@ -274,16 +279,28 @@ class OpenAIStreamingInterface:
274
279
  if prev_message_type and prev_message_type != "tool_call_message":
275
280
  message_index += 1
276
281
  self.tool_call_name = str(self.function_name_buffer)
277
- tool_call_msg = ToolCallMessage(
278
- id=self.letta_message_id,
279
- date=datetime.now(timezone.utc),
280
- tool_call=ToolCallDelta(
281
- name=self.function_name_buffer,
282
- arguments=None,
283
- tool_call_id=self.function_id_buffer,
284
- ),
285
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
286
- )
282
+ if self.tool_call_name in self.requires_approval_tools:
283
+ tool_call_msg = ApprovalRequestMessage(
284
+ id=self.letta_message_id,
285
+ date=datetime.now(timezone.utc),
286
+ tool_call=ToolCallDelta(
287
+ name=self.function_name_buffer,
288
+ arguments=None,
289
+ tool_call_id=self.function_id_buffer,
290
+ ),
291
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
292
+ )
293
+ else:
294
+ tool_call_msg = ToolCallMessage(
295
+ id=self.letta_message_id,
296
+ date=datetime.now(timezone.utc),
297
+ tool_call=ToolCallDelta(
298
+ name=self.function_name_buffer,
299
+ arguments=None,
300
+ tool_call_id=self.function_id_buffer,
301
+ ),
302
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
303
+ )
287
304
  prev_message_type = tool_call_msg.message_type
288
305
  yield tool_call_msg
289
306
 
@@ -404,17 +421,30 @@ class OpenAIStreamingInterface:
404
421
  combined_chunk = self.function_args_buffer + updates_main_json
405
422
  if prev_message_type and prev_message_type != "tool_call_message":
406
423
  message_index += 1
407
- tool_call_msg = ToolCallMessage(
408
- id=self.letta_message_id,
409
- date=datetime.now(timezone.utc),
410
- tool_call=ToolCallDelta(
411
- name=self.function_name_buffer,
412
- arguments=combined_chunk,
413
- tool_call_id=self.function_id_buffer,
414
- ),
415
- # name=name,
416
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
417
- )
424
+ if self.function_name_buffer in self.requires_approval_tools:
425
+ tool_call_msg = ApprovalRequestMessage(
426
+ id=self.letta_message_id,
427
+ date=datetime.now(timezone.utc),
428
+ tool_call=ToolCallDelta(
429
+ name=self.function_name_buffer,
430
+ arguments=combined_chunk,
431
+ tool_call_id=self.function_id_buffer,
432
+ ),
433
+ # name=name,
434
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
435
+ )
436
+ else:
437
+ tool_call_msg = ToolCallMessage(
438
+ id=self.letta_message_id,
439
+ date=datetime.now(timezone.utc),
440
+ tool_call=ToolCallDelta(
441
+ name=self.function_name_buffer,
442
+ arguments=combined_chunk,
443
+ tool_call_id=self.function_id_buffer,
444
+ ),
445
+ # name=name,
446
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
447
+ )
418
448
  prev_message_type = tool_call_msg.message_type
419
449
  yield tool_call_msg
420
450
  # clear buffer
@@ -424,17 +454,30 @@ class OpenAIStreamingInterface:
424
454
  # If there's no buffer to clear, just output a new chunk with new data
425
455
  if prev_message_type and prev_message_type != "tool_call_message":
426
456
  message_index += 1
427
- tool_call_msg = ToolCallMessage(
428
- id=self.letta_message_id,
429
- date=datetime.now(timezone.utc),
430
- tool_call=ToolCallDelta(
431
- name=None,
432
- arguments=updates_main_json,
433
- tool_call_id=self.function_id_buffer,
434
- ),
435
- # name=name,
436
- otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
437
- )
457
+ if self.function_name_buffer in self.requires_approval_tools:
458
+ tool_call_msg = ApprovalRequestMessage(
459
+ id=self.letta_message_id,
460
+ date=datetime.now(timezone.utc),
461
+ tool_call=ToolCallDelta(
462
+ name=None,
463
+ arguments=updates_main_json,
464
+ tool_call_id=self.function_id_buffer,
465
+ ),
466
+ # name=name,
467
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
468
+ )
469
+ else:
470
+ tool_call_msg = ToolCallMessage(
471
+ id=self.letta_message_id,
472
+ date=datetime.now(timezone.utc),
473
+ tool_call=ToolCallDelta(
474
+ name=None,
475
+ arguments=updates_main_json,
476
+ tool_call_id=self.function_id_buffer,
477
+ ),
478
+ # name=name,
479
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
480
+ )
438
481
  prev_message_type = tool_call_msg.message_type
439
482
  yield tool_call_msg
440
483
  self.function_id_buffer = None
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import os
2
3
  from typing import List, Optional
3
4
 
@@ -319,13 +320,53 @@ class OpenAIClient(LLMClientBase):
319
320
 
320
321
  @trace_method
321
322
  async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
322
- """Request embeddings given texts and embedding config"""
323
+ """Request embeddings given texts and embedding config with chunking and retry logic"""
324
+ if not inputs:
325
+ return []
326
+
323
327
  kwargs = self._prepare_client_kwargs_embedding(embedding_config)
324
328
  client = AsyncOpenAI(**kwargs)
325
- response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
326
329
 
327
- # TODO: add total usage
328
- return [r.embedding for r in response.data]
330
+ # track results by original index to maintain order
331
+ results = [None] * len(inputs)
332
+
333
+ # queue of (start_idx, chunk_inputs) to process
334
+ chunks_to_process = [(i, inputs[i : i + 2048]) for i in range(0, len(inputs), 2048)]
335
+
336
+ min_chunk_size = 256
337
+
338
+ while chunks_to_process:
339
+ tasks = []
340
+ task_metadata = []
341
+
342
+ for start_idx, chunk_inputs in chunks_to_process:
343
+ task = client.embeddings.create(model=embedding_config.embedding_model, input=chunk_inputs)
344
+ tasks.append(task)
345
+ task_metadata.append((start_idx, chunk_inputs))
346
+
347
+ task_results = await asyncio.gather(*tasks, return_exceptions=True)
348
+
349
+ failed_chunks = []
350
+ for (start_idx, chunk_inputs), result in zip(task_metadata, task_results):
351
+ if isinstance(result, Exception):
352
+ # check if we can retry with smaller chunks
353
+ if len(chunk_inputs) > min_chunk_size:
354
+ # split chunk in half and queue for retry
355
+ mid = len(chunk_inputs) // 2
356
+ failed_chunks.append((start_idx, chunk_inputs[:mid]))
357
+ failed_chunks.append((start_idx + mid, chunk_inputs[mid:]))
358
+ else:
359
+ # can't split further, re-raise the error
360
+ logger.error(f"Failed to get embeddings for chunk starting at {start_idx} even with minimum size {min_chunk_size}")
361
+ raise result
362
+ else:
363
+ embeddings = [r.embedding for r in result.data]
364
+ for i, embedding in enumerate(embeddings):
365
+ results[start_idx + i] = embedding
366
+
367
+ chunks_to_process = failed_chunks
368
+
369
+ return results
329
370
 
330
371
  @trace_method
331
372
  def handle_llm_error(self, e: Exception) -> Exception:
letta/orm/block.py CHANGED
@@ -41,6 +41,7 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
41
41
 
42
42
  # permissions of the agent
43
43
  read_only: Mapped[bool] = mapped_column(doc="whether the agent has read-only access to the block", default=False)
44
+ hidden: Mapped[Optional[bool]] = mapped_column(nullable=True, doc="If set to True, the block will be hidden.")
44
45
 
45
46
  # history pointers / locking mechanisms
46
47
  current_history_entry_id: Mapped[Optional[str]] = mapped_column(
letta/orm/group.py CHANGED
@@ -24,6 +24,7 @@ class Group(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateMixin):
24
24
  min_message_buffer_length: Mapped[Optional[int]] = mapped_column(nullable=True, doc="")
25
25
  turns_counter: Mapped[Optional[int]] = mapped_column(nullable=True, doc="")
26
26
  last_processed_message_id: Mapped[Optional[str]] = mapped_column(nullable=True, doc="")
27
+ hidden: Mapped[Optional[bool]] = mapped_column(nullable=True, doc="If set to True, the group will be hidden.")
27
28
 
28
29
  # relationships
29
30
  organization: Mapped["Organization"] = relationship("Organization", back_populates="groups")
letta/orm/source.py CHANGED
@@ -1,12 +1,13 @@
1
1
  from typing import TYPE_CHECKING, Optional
2
2
 
3
- from sqlalchemy import JSON, Index, UniqueConstraint
3
+ from sqlalchemy import JSON, Enum, Index, UniqueConstraint
4
4
  from sqlalchemy.orm import Mapped, mapped_column
5
5
 
6
6
  from letta.orm.custom_columns import EmbeddingConfigColumn
7
7
  from letta.orm.mixins import OrganizationMixin
8
8
  from letta.orm.sqlalchemy_base import SqlalchemyBase
9
9
  from letta.schemas.embedding_config import EmbeddingConfig
10
+ from letta.schemas.enums import VectorDBProvider
10
11
  from letta.schemas.source import Source as PydanticSource
11
12
 
12
13
  if TYPE_CHECKING:
@@ -30,3 +31,9 @@ class Source(SqlalchemyBase, OrganizationMixin):
30
31
  instructions: Mapped[str] = mapped_column(nullable=True, doc="instructions for how to use the source")
31
32
  embedding_config: Mapped[EmbeddingConfig] = mapped_column(EmbeddingConfigColumn, doc="Configuration settings for embedding.")
32
33
  metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="metadata for the source.")
34
+ vector_db_provider: Mapped[VectorDBProvider] = mapped_column(
35
+ Enum(VectorDBProvider),
36
+ nullable=False,
37
+ default=VectorDBProvider.NATIVE,
38
+ doc="The vector database provider used for this source's passages",
39
+ )
letta/orm/step_metrics.py CHANGED
@@ -43,6 +43,16 @@ class StepMetrics(SqlalchemyBase, ProjectMixin, AgentMixin):
43
43
  nullable=True,
44
44
  doc="The unique identifier of the job",
45
45
  )
46
+ step_start_ns: Mapped[Optional[int]] = mapped_column(
47
+ BigInteger,
48
+ nullable=True,
49
+ doc="The timestamp of the start of the step in nanoseconds",
50
+ )
51
+ llm_request_start_ns: Mapped[Optional[int]] = mapped_column(
52
+ BigInteger,
53
+ nullable=True,
54
+ doc="The timestamp of the start of the LLM request in nanoseconds",
55
+ )
46
56
  llm_request_ns: Mapped[Optional[int]] = mapped_column(
47
57
  BigInteger,
48
58
  nullable=True,
letta/schemas/block.py CHANGED
@@ -38,6 +38,10 @@ class BaseBlock(LettaBase, validate_assignment=True):
38
38
  # metadata
39
39
  description: Optional[str] = Field(None, description="Description of the block.")
40
40
  metadata: Optional[dict] = Field({}, description="Metadata of the block.")
41
+ hidden: Optional[bool] = Field(
42
+ None,
43
+ description="If set to True, the block will be hidden.",
44
+ )
41
45
 
42
46
  # def __len__(self):
43
47
  # return len(self.value)
letta/schemas/enums.py CHANGED
@@ -180,6 +180,7 @@ class VectorDBProvider(str, Enum):
180
180
 
181
181
  NATIVE = "native"
182
182
  TPUF = "tpuf"
183
+ PINECONE = "pinecone"
183
184
 
184
185
 
185
186
  class TagMatchMode(str, Enum):
letta/schemas/group.py CHANGED
@@ -49,6 +49,10 @@ class Group(GroupBase):
49
49
  None,
50
50
  description="The desired minimum length of messages in the context window of the convo agent. This is a best effort, and may be off-by-one due to user/assistant interleaving.",
51
51
  )
52
+ hidden: Optional[bool] = Field(
53
+ None,
54
+ description="If set to True, the group will be hidden.",
55
+ )
52
56
 
53
57
  @property
54
58
  def manager_config(self) -> ManagerConfig:
@@ -170,6 +174,10 @@ class GroupCreate(BaseModel):
170
174
  manager_config: ManagerConfigUnion = Field(RoundRobinManager(), description="")
171
175
  project_id: Optional[str] = Field(None, description="The associated project id.")
172
176
  shared_block_ids: List[str] = Field([], description="")
177
+ hidden: Optional[bool] = Field(
178
+ None,
179
+ description="If set to True, the group will be hidden.",
180
+ )
173
181
 
174
182
 
175
183
  class InternalTemplateGroupCreate(GroupCreate):
@@ -265,7 +265,7 @@ class ApprovalRequestMessage(LettaMessage):
265
265
  message_type: Literal[MessageType.approval_request_message] = Field(
266
266
  default=MessageType.approval_request_message, description="The type of the message."
267
267
  )
268
- tool_call: ToolCall = Field(..., description="The tool call that has been requested by the llm to run")
268
+ tool_call: Union[ToolCall, ToolCallDelta] = Field(..., description="The tool call that has been requested by the llm to run")
269
269
 
270
270
 
271
271
  class ApprovalResponseMessage(LettaMessage):
@@ -60,7 +60,7 @@ class LettaStreamingRequest(LettaRequest):
60
60
  description="Flag to determine if individual tokens should be streamed, rather than streaming per step.",
61
61
  )
62
62
  include_pings: bool = Field(
63
- default=False,
63
+ default=True,
64
64
  description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
65
65
  )
66
66
  background: bool = Field(
@@ -94,7 +94,7 @@ class RetrieveStreamRequest(BaseModel):
94
94
  0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
95
95
  )
96
96
  include_pings: Optional[bool] = Field(
97
- default=False,
97
+ default=True,
98
98
  description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
99
99
  )
100
100
  poll_interval: Optional[float] = Field(
letta/schemas/mcp.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from typing import Any, Dict, Optional, Union
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  from pydantic import Field
5
5
 
@@ -175,3 +175,11 @@ class MCPOAuthSessionUpdate(BaseMCPOAuth):
175
175
  client_secret: Optional[str] = Field(None, description="OAuth client secret")
176
176
  redirect_uri: Optional[str] = Field(None, description="OAuth redirect URI")
177
177
  status: Optional[OAuthSessionStatus] = Field(None, description="Session status")
178
+
179
+
180
+ class MCPServerResyncResult(LettaBase):
181
+ """Result of resyncing MCP server tools."""
182
+
183
+ deleted: List[str] = Field(default_factory=list, description="List of deleted tool names")
184
+ updated: List[str] = Field(default_factory=list, description="List of updated tool names")
185
+ added: List[str] = Field(default_factory=list, description="List of added tool names")
letta/schemas/message.py CHANGED
@@ -1187,3 +1187,26 @@ class ToolReturn(BaseModel):
1187
1187
  stdout: Optional[List[str]] = Field(default=None, description="Captured stdout (e.g. prints, logs) from the tool invocation")
1188
1188
  stderr: Optional[List[str]] = Field(default=None, description="Captured stderr from the tool invocation")
1189
1189
  # func_return: Optional[Any] = Field(None, description="The function return object")
1190
+
1191
+
1192
+ class MessageSearchRequest(BaseModel):
1193
+ """Request model for searching messages across the organization"""
1194
+
1195
+ query: Optional[str] = Field(None, description="Text query for full-text search")
1196
+ search_mode: Literal["vector", "fts", "hybrid"] = Field("hybrid", description="Search mode to use")
1197
+ roles: Optional[List[MessageRole]] = Field(None, description="Filter messages by role")
1198
+ project_id: Optional[str] = Field(None, description="Filter messages by project ID")
1199
+ template_id: Optional[str] = Field(None, description="Filter messages by template ID")
1200
+ limit: int = Field(50, description="Maximum number of results to return", ge=1, le=100)
1201
+ start_date: Optional[datetime] = Field(None, description="Filter messages created after this date")
1202
+ end_date: Optional[datetime] = Field(None, description="Filter messages created on or before this date")
1203
+
1204
+
1205
+ class MessageSearchResult(BaseModel):
1206
+ """Result from a message search operation with scoring details."""
1207
+
1208
+ embedded_text: str = Field(..., description="The embedded content (LLM-friendly)")
1209
+ message: Message = Field(..., description="The raw message object")
1210
+ fts_rank: Optional[int] = Field(None, description="Full-text search rank position if FTS was used")
1211
+ vector_rank: Optional[int] = Field(None, description="Vector search rank position if vector search was used")
1212
+ rrf_score: float = Field(..., description="Reciprocal Rank Fusion combined score")
@@ -3,7 +3,7 @@ from typing import Literal
3
3
  import aiohttp
4
4
  from pydantic import Field
5
5
 
6
- from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
6
+ from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
7
7
  from letta.log import get_logger
8
8
  from letta.schemas.embedding_config import EmbeddingConfig
9
9
  from letta.schemas.enums import ProviderCategory, ProviderType
@@ -777,7 +777,6 @@ class AnthropicProvider(Provider):
777
777
 
778
778
  configs = []
779
779
  for model in models:
780
-
781
780
  if model["type"] != "model":
782
781
  continue
783
782
 
@@ -1069,7 +1068,7 @@ class GroqProvider(OpenAIProvider):
1069
1068
  response = openai_get_model_list(self.base_url, api_key=self.api_key)
1070
1069
  configs = []
1071
1070
  for model in response["data"]:
1072
- if not "context_window" in model:
1071
+ if "context_window" not in model:
1073
1072
  continue
1074
1073
  configs.append(
1075
1074
  LLMConfig(
letta/schemas/source.py CHANGED
@@ -3,7 +3,9 @@ from typing import Optional
3
3
 
4
4
  from pydantic import Field
5
5
 
6
+ from letta.helpers.tpuf_client import should_use_tpuf
6
7
  from letta.schemas.embedding_config import EmbeddingConfig
8
+ from letta.schemas.enums import VectorDBProvider
7
9
  from letta.schemas.letta_base import LettaBase
8
10
 
9
11
 
@@ -40,6 +42,10 @@ class Source(BaseSource):
40
42
  metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Metadata associated with the source.")
41
43
 
42
44
  # metadata fields
45
+ vector_db_provider: VectorDBProvider = Field(
46
+ default=VectorDBProvider.NATIVE,
47
+ description="The vector database provider used for this source's passages",
48
+ )
43
49
  created_by_id: Optional[str] = Field(None, description="The id of the user that made this Tool.")
44
50
  last_updated_by_id: Optional[str] = Field(None, description="The id of the user that made this Tool.")
45
51
  created_at: Optional[datetime] = Field(None, description="The timestamp when the source was created.")
@@ -15,6 +15,8 @@ class StepMetrics(StepMetricsBase):
15
15
  provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
16
16
  job_id: Optional[str] = Field(None, description="The unique identifier of the job.")
17
17
  agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
18
+ step_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the step in nanoseconds.")
19
+ llm_request_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the llm request in nanoseconds.")
18
20
  llm_request_ns: Optional[int] = Field(None, description="Time spent on LLM requests in nanoseconds.")
19
21
  tool_execution_ns: Optional[int] = Field(None, description="Time spent on tool execution in nanoseconds.")
20
22
  step_ns: Optional[int] = Field(None, description="Total time for the step in nanoseconds.")
@@ -5,6 +5,7 @@ from letta.server.rest_api.routers.v1.folders import router as folders_router
5
5
  from letta.server.rest_api.routers.v1.groups import router as groups_router
6
6
  from letta.server.rest_api.routers.v1.health import router as health_router
7
7
  from letta.server.rest_api.routers.v1.identities import router as identities_router
8
+ from letta.server.rest_api.routers.v1.internal_templates import router as internal_templates_router
8
9
  from letta.server.rest_api.routers.v1.jobs import router as jobs_router
9
10
  from letta.server.rest_api.routers.v1.llms import router as llm_router
10
11
  from letta.server.rest_api.routers.v1.messages import router as messages_router
@@ -25,6 +26,7 @@ ROUTERS = [
25
26
  agents_router,
26
27
  groups_router,
27
28
  identities_router,
29
+ internal_templates_router,
28
30
  llm_router,
29
31
  blocks_router,
30
32
  jobs_router,