letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
letta/schemas/message.py CHANGED
@@ -1027,10 +1027,13 @@ class Message(BaseMessage):
1027
1027
  result = [m for m in result if m is not None]
1028
1028
  return result
1029
1029
 
1030
- def to_google_ai_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict:
1030
+ def to_google_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict | None:
1031
1031
  """
1032
1032
  Go from Message class to Google AI REST message object
1033
1033
  """
1034
+ if self.role == "approval" and self.tool_calls is None:
1035
+ return None
1036
+
1034
1037
  # type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content
1035
1038
  # parts[]: Part
1036
1039
  # role: str ('user' or 'model')
@@ -1076,7 +1079,7 @@ class Message(BaseMessage):
1076
1079
  "parts": content_parts,
1077
1080
  }
1078
1081
 
1079
- elif self.role == "assistant":
1082
+ elif self.role == "assistant" or self.role == "approval":
1080
1083
  assert self.tool_calls is not None or text_content is not None
1081
1084
  google_ai_message = {
1082
1085
  "role": "model", # NOTE: different
@@ -1164,6 +1167,20 @@ class Message(BaseMessage):
1164
1167
 
1165
1168
  return google_ai_message
1166
1169
 
1170
+ @staticmethod
1171
+ def to_google_dicts_from_list(
1172
+ messages: List[Message],
1173
+ put_inner_thoughts_in_kwargs: bool = True,
1174
+ ):
1175
+ result = [
1176
+ m.to_google_dict(
1177
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
1178
+ )
1179
+ for m in messages
1180
+ ]
1181
+ result = [m for m in result if m is not None]
1182
+ return result
1183
+
1167
1184
  @staticmethod
1168
1185
  def generate_otid_from_id(message_id: str, index: int) -> str:
1169
1186
  """
@@ -1187,3 +1204,26 @@ class ToolReturn(BaseModel):
1187
1204
  stdout: Optional[List[str]] = Field(default=None, description="Captured stdout (e.g. prints, logs) from the tool invocation")
1188
1205
  stderr: Optional[List[str]] = Field(default=None, description="Captured stderr from the tool invocation")
1189
1206
  # func_return: Optional[Any] = Field(None, description="The function return object")
1207
+
1208
+
1209
+ class MessageSearchRequest(BaseModel):
1210
+ """Request model for searching messages across the organization"""
1211
+
1212
+ query: Optional[str] = Field(None, description="Text query for full-text search")
1213
+ search_mode: Literal["vector", "fts", "hybrid"] = Field("hybrid", description="Search mode to use")
1214
+ roles: Optional[List[MessageRole]] = Field(None, description="Filter messages by role")
1215
+ project_id: Optional[str] = Field(None, description="Filter messages by project ID")
1216
+ template_id: Optional[str] = Field(None, description="Filter messages by template ID")
1217
+ limit: int = Field(50, description="Maximum number of results to return", ge=1, le=100)
1218
+ start_date: Optional[datetime] = Field(None, description="Filter messages created after this date")
1219
+ end_date: Optional[datetime] = Field(None, description="Filter messages created on or before this date")
1220
+
1221
+
1222
+ class MessageSearchResult(BaseModel):
1223
+ """Result from a message search operation with scoring details."""
1224
+
1225
+ embedded_text: str = Field(..., description="The embedded content (LLM-friendly)")
1226
+ message: Message = Field(..., description="The raw message object")
1227
+ fts_rank: Optional[int] = Field(None, description="Full-text search rank position if FTS was used")
1228
+ vector_rank: Optional[int] = Field(None, description="Vector search rank position if vector search was used")
1229
+ rrf_score: float = Field(..., description="Reciprocal Rank Fusion combined score")
@@ -3,7 +3,7 @@ from typing import Literal
3
3
  import aiohttp
4
4
  from pydantic import Field
5
5
 
6
- from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
6
+ from letta.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
7
7
  from letta.log import get_logger
8
8
  from letta.schemas.embedding_config import EmbeddingConfig
9
9
  from letta.schemas.enums import ProviderCategory, ProviderType
@@ -777,7 +777,6 @@ class AnthropicProvider(Provider):
777
777
 
778
778
  configs = []
779
779
  for model in models:
780
-
781
780
  if model["type"] != "model":
782
781
  continue
783
782
 
@@ -1069,7 +1068,7 @@ class GroqProvider(OpenAIProvider):
1069
1068
  response = openai_get_model_list(self.base_url, api_key=self.api_key)
1070
1069
  configs = []
1071
1070
  for model in response["data"]:
1072
- if not "context_window" in model:
1071
+ if "context_window" not in model:
1073
1072
  continue
1074
1073
  configs.append(
1075
1074
  LLMConfig(
letta/schemas/source.py CHANGED
@@ -3,7 +3,9 @@ from typing import Optional
3
3
 
4
4
  from pydantic import Field
5
5
 
6
+ from letta.helpers.tpuf_client import should_use_tpuf
6
7
  from letta.schemas.embedding_config import EmbeddingConfig
8
+ from letta.schemas.enums import VectorDBProvider
7
9
  from letta.schemas.letta_base import LettaBase
8
10
 
9
11
 
@@ -40,6 +42,10 @@ class Source(BaseSource):
40
42
  metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="Metadata associated with the source.")
41
43
 
42
44
  # metadata fields
45
+ vector_db_provider: VectorDBProvider = Field(
46
+ default=VectorDBProvider.NATIVE,
47
+ description="The vector database provider used for this source's passages",
48
+ )
43
49
  created_by_id: Optional[str] = Field(None, description="The id of the user that made this Tool.")
44
50
  last_updated_by_id: Optional[str] = Field(None, description="The id of the user that made this Tool.")
45
51
  created_at: Optional[datetime] = Field(None, description="The timestamp when the source was created.")
@@ -15,6 +15,8 @@ class StepMetrics(StepMetricsBase):
15
15
  provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
16
16
  job_id: Optional[str] = Field(None, description="The unique identifier of the job.")
17
17
  agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
18
+ step_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the step in nanoseconds.")
19
+ llm_request_start_ns: Optional[int] = Field(None, description="The timestamp of the start of the llm request in nanoseconds.")
18
20
  llm_request_ns: Optional[int] = Field(None, description="Time spent on LLM requests in nanoseconds.")
19
21
  tool_execution_ns: Optional[int] = Field(None, description="Time spent on tool execution in nanoseconds.")
20
22
  step_ns: Optional[int] = Field(None, description="Total time for the step in nanoseconds.")
@@ -295,6 +295,25 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
295
295
  self.optimistic_json_parser = OptimisticJSONParser()
296
296
  self.current_json_parse_result = {}
297
297
 
298
+ # NOTE (fix): OpenAI deltas may split a key and its value across chunks
299
+ # (e.g. '"request_heartbeat"' in one chunk, ': true' in the next). The
300
+ # old behavior passed through each fragment verbatim, which could emit
301
+ # a bare key (or a key+opening quote) without its value, producing
302
+ # invalid JSON slices and the "missing end-quote" symptom downstream.
303
+ #
304
+ # To make streamed arguments robust, we add a JSON-aware incremental
305
+ # reader that only releases safe updates for the "main" JSON portion of
306
+ # the tool_call arguments. This prevents partial-key emissions while
307
+ # preserving incremental streaming for consumers.
308
+ #
309
+ # We still stream 'name' fragments as-is (safe), but 'arguments' are
310
+ # parsed incrementally and emitted only when a boundary is safe.
311
+ self._raw_args_reader = JSONInnerThoughtsExtractor(
312
+ inner_thoughts_key=inner_thoughts_kwarg,
313
+ wait_for_first_key=False,
314
+ )
315
+ self._raw_args_tool_call_id = None
316
+
298
317
  # Store metadata passed from server
299
318
  self.metadata = {}
300
319
 
@@ -654,11 +673,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
654
673
  tool_call_delta = {}
655
674
  if tool_call.id:
656
675
  tool_call_delta["id"] = tool_call.id
676
+ # Reset raw args reader per tool_call id
677
+ if self._raw_args_tool_call_id != tool_call.id:
678
+ self._raw_args_tool_call_id = tool_call.id
679
+ self._raw_args_reader = JSONInnerThoughtsExtractor(
680
+ inner_thoughts_key=self.inner_thoughts_kwarg,
681
+ wait_for_first_key=False,
682
+ )
657
683
  if tool_call.function:
658
- if tool_call.function.arguments:
659
- tool_call_delta["arguments"] = tool_call.function.arguments
684
+ # Stream name fragments as-is (names are short and harmless to emit)
660
685
  if tool_call.function.name:
661
686
  tool_call_delta["name"] = tool_call.function.name
687
+ # For arguments, incrementally parse to avoid emitting partial keys
688
+ if tool_call.function.arguments:
689
+ self.current_function_arguments += tool_call.function.arguments
690
+ updates_main_json, _ = self._raw_args_reader.process_fragment(tool_call.function.arguments)
691
+ # Only emit argument updates when a safe boundary is reached
692
+ if updates_main_json:
693
+ tool_call_delta["arguments"] = updates_main_json
662
694
 
663
695
  # We might end up with a no-op, in which case we should omit
664
696
  if (
@@ -63,6 +63,8 @@ class OptimisticJSONParser(JSONParser):
63
63
  '"': self._parse_string,
64
64
  "t": self._parse_true,
65
65
  "f": self._parse_false,
66
+ "T": self._parse_true,
67
+ "F": self._parse_false,
66
68
  "n": self._parse_null,
67
69
  }
68
70
  # Register number parser for digits and signs
@@ -8,6 +8,7 @@ from typing import AsyncIterator, Dict, List, Optional
8
8
 
9
9
  from letta.data_sources.redis_client import AsyncRedisClient
10
10
  from letta.log import get_logger
11
+ from letta.utils import safe_create_task
11
12
 
12
13
  logger = get_logger(__name__)
13
14
 
@@ -62,7 +63,7 @@ class RedisSSEStreamWriter:
62
63
  """Start the background flush task."""
63
64
  if not self._running:
64
65
  self._running = True
65
- self._flush_task = asyncio.create_task(self._periodic_flush())
66
+ self._flush_task = safe_create_task(self._periodic_flush(), label="redis_periodic_flush")
66
67
 
67
68
  async def stop(self):
68
69
  """Stop the background flush task and flush remaining data."""
@@ -14,6 +14,7 @@ from letta.server.rest_api.chat_completions_interface import ChatCompletionsStre
14
14
 
15
15
  # TODO this belongs in a controller!
16
16
  from letta.server.rest_api.utils import get_letta_server, get_user_message_from_chat_completions_request, sse_async_generator
17
+ from letta.utils import safe_create_task
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from letta.server.server import SyncServer
@@ -98,7 +99,7 @@ async def send_message_to_agent_chat_completions(
98
99
 
99
100
  # Offload the synchronous message_func to a separate thread
100
101
  streaming_interface.stream_start()
101
- asyncio.create_task(
102
+ safe_create_task(
102
103
  asyncio.to_thread(
103
104
  server.send_messages,
104
105
  actor=actor,
@@ -106,7 +107,8 @@ async def send_message_to_agent_chat_completions(
106
107
  input_messages=messages,
107
108
  interface=streaming_interface,
108
109
  put_inner_thoughts_first=False,
109
- )
110
+ ),
111
+ label="openai_send_messages",
110
112
  )
111
113
 
112
114
  # return a stream
@@ -5,6 +5,7 @@ from letta.server.rest_api.routers.v1.folders import router as folders_router
5
5
  from letta.server.rest_api.routers.v1.groups import router as groups_router
6
6
  from letta.server.rest_api.routers.v1.health import router as health_router
7
7
  from letta.server.rest_api.routers.v1.identities import router as identities_router
8
+ from letta.server.rest_api.routers.v1.internal_templates import router as internal_templates_router
8
9
  from letta.server.rest_api.routers.v1.jobs import router as jobs_router
9
10
  from letta.server.rest_api.routers.v1.llms import router as llm_router
10
11
  from letta.server.rest_api.routers.v1.messages import router as messages_router
@@ -25,6 +26,7 @@ ROUTERS = [
25
26
  agents_router,
26
27
  groups_router,
27
28
  identities_router,
29
+ internal_templates_router,
28
30
  llm_router,
29
31
  blocks_router,
30
32
  jobs_router,