PyPI - letta-nightly - Versions diffs - 0.7.29.dev20250602104315__py3-none-any.whl → 0.8.0.dev20250604104349__py3-none-any.whl - Mend

letta-nightly 0.7.29.dev20250602104315py3-none-any.whl → 0.8.0.dev20250604104349py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

letta/__init__.py +7 -1
letta/agent.py +16 -9
letta/agents/base_agent.py +1 -0
letta/agents/ephemeral_summary_agent.py +104 -0
letta/agents/helpers.py +35 -3
letta/agents/letta_agent.py +492 -176
letta/agents/letta_agent_batch.py +22 -16
letta/agents/prompts/summary_system_prompt.txt +62 -0
letta/agents/voice_agent.py +22 -7
letta/agents/voice_sleeptime_agent.py +13 -8
letta/constants.py +33 -1
letta/data_sources/connectors.py +52 -36
letta/errors.py +4 -0
letta/functions/ast_parsers.py +13 -30
letta/functions/function_sets/base.py +3 -1
letta/functions/functions.py +2 -0
letta/functions/mcp_client/base_client.py +151 -97
letta/functions/mcp_client/sse_client.py +49 -31
letta/functions/mcp_client/stdio_client.py +107 -106
letta/functions/schema_generator.py +22 -22
letta/groups/helpers.py +3 -4
letta/groups/sleeptime_multi_agent.py +4 -4
letta/groups/sleeptime_multi_agent_v2.py +22 -0
letta/helpers/composio_helpers.py +16 -0
letta/helpers/converters.py +20 -0
letta/helpers/datetime_helpers.py +1 -6
letta/helpers/tool_rule_solver.py +2 -1
letta/interfaces/anthropic_streaming_interface.py +17 -2
letta/interfaces/openai_chat_completions_streaming_interface.py +1 -0
letta/interfaces/openai_streaming_interface.py +18 -2
letta/jobs/llm_batch_job_polling.py +1 -1
letta/jobs/scheduler.py +1 -1
letta/llm_api/anthropic_client.py +24 -3
letta/llm_api/google_ai_client.py +0 -15
letta/llm_api/google_vertex_client.py +6 -5
letta/llm_api/llm_client_base.py +15 -0
letta/llm_api/openai.py +2 -2
letta/llm_api/openai_client.py +60 -8
letta/orm/__init__.py +2 -0
letta/orm/agent.py +45 -43
letta/orm/base.py +0 -2
letta/orm/block.py +1 -0
letta/orm/custom_columns.py +13 -0
letta/orm/enums.py +5 -0
letta/orm/file.py +3 -1
letta/orm/files_agents.py +68 -0
letta/orm/mcp_server.py +48 -0
letta/orm/message.py +1 -0
letta/orm/organization.py +11 -2
letta/orm/passage.py +25 -10
letta/orm/sandbox_config.py +5 -2
letta/orm/sqlalchemy_base.py +171 -110
letta/prompts/system/memgpt_base.txt +6 -1
letta/prompts/system/memgpt_v2_chat.txt +57 -0
letta/prompts/system/sleeptime.txt +2 -0
letta/prompts/system/sleeptime_v2.txt +28 -0
letta/schemas/agent.py +87 -20
letta/schemas/block.py +7 -1
letta/schemas/file.py +57 -0
letta/schemas/mcp.py +74 -0
letta/schemas/memory.py +5 -2
letta/schemas/message.py +9 -0
letta/schemas/openai/openai.py +0 -6
letta/schemas/providers.py +33 -4
letta/schemas/tool.py +26 -21
letta/schemas/tool_execution_result.py +5 -0
letta/server/db.py +23 -8
letta/server/rest_api/app.py +73 -56
letta/server/rest_api/interface.py +4 -4
letta/server/rest_api/routers/v1/agents.py +132 -47
letta/server/rest_api/routers/v1/blocks.py +3 -2
letta/server/rest_api/routers/v1/embeddings.py +3 -3
letta/server/rest_api/routers/v1/groups.py +3 -3
letta/server/rest_api/routers/v1/jobs.py +14 -17
letta/server/rest_api/routers/v1/organizations.py +10 -10
letta/server/rest_api/routers/v1/providers.py +12 -10
letta/server/rest_api/routers/v1/runs.py +3 -3
letta/server/rest_api/routers/v1/sandbox_configs.py +12 -12
letta/server/rest_api/routers/v1/sources.py +108 -43
letta/server/rest_api/routers/v1/steps.py +8 -6
letta/server/rest_api/routers/v1/tools.py +134 -95
letta/server/rest_api/utils.py +12 -1
letta/server/server.py +272 -73
letta/services/agent_manager.py +246 -313
letta/services/block_manager.py +30 -9
letta/services/context_window_calculator/__init__.py +0 -0
letta/services/context_window_calculator/context_window_calculator.py +150 -0
letta/services/context_window_calculator/token_counter.py +82 -0
letta/services/file_processor/__init__.py +0 -0
letta/services/file_processor/chunker/__init__.py +0 -0
letta/services/file_processor/chunker/llama_index_chunker.py +29 -0
letta/services/file_processor/embedder/__init__.py +0 -0
letta/services/file_processor/embedder/openai_embedder.py +84 -0
letta/services/file_processor/file_processor.py +123 -0
letta/services/file_processor/parser/__init__.py +0 -0
letta/services/file_processor/parser/base_parser.py +9 -0
letta/services/file_processor/parser/mistral_parser.py +54 -0
letta/services/file_processor/types.py +0 -0
letta/services/files_agents_manager.py +184 -0
letta/services/group_manager.py +118 -0
letta/services/helpers/agent_manager_helper.py +76 -21
letta/services/helpers/tool_execution_helper.py +3 -0
letta/services/helpers/tool_parser_helper.py +100 -0
letta/services/identity_manager.py +44 -42
letta/services/job_manager.py +21 -10
letta/services/mcp/base_client.py +5 -2
letta/services/mcp/sse_client.py +3 -5
letta/services/mcp/stdio_client.py +3 -5
letta/services/mcp_manager.py +281 -0
letta/services/message_manager.py +40 -26
letta/services/organization_manager.py +55 -19
letta/services/passage_manager.py +211 -13
letta/services/provider_manager.py +48 -2
letta/services/sandbox_config_manager.py +105 -0
letta/services/source_manager.py +4 -5
letta/services/step_manager.py +9 -6
letta/services/summarizer/summarizer.py +50 -23
letta/services/telemetry_manager.py +7 -0
letta/services/tool_executor/tool_execution_manager.py +11 -52
letta/services/tool_executor/tool_execution_sandbox.py +4 -34
letta/services/tool_executor/tool_executor.py +107 -105
letta/services/tool_manager.py +56 -17
letta/services/tool_sandbox/base.py +39 -92
letta/services/tool_sandbox/e2b_sandbox.py +16 -11
letta/services/tool_sandbox/local_sandbox.py +51 -23
letta/services/user_manager.py +36 -3
letta/settings.py +10 -3
letta/templates/__init__.py +0 -0
letta/templates/sandbox_code_file.py.j2 +47 -0
letta/templates/template_helper.py +16 -0
letta/tracing.py +30 -1
letta/types/__init__.py +7 -0
letta/utils.py +25 -1
{letta_nightly-0.7.29.dev20250602104315.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/METADATA +7 -2
{letta_nightly-0.7.29.dev20250602104315.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/RECORD +138 -112
{letta_nightly-0.7.29.dev20250602104315.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.29.dev20250602104315.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.29.dev20250602104315.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/entry_points.txt +0 -0

letta/agents/letta_agent_batch.py CHANGED Viewed

@@ -27,6 +27,7 @@ from letta.schemas.llm_batch_job import LLMBatchItem
 from letta.schemas.message import Message, MessageCreate
 from letta.schemas.openai.chat_completion_response import ToolCall as OpenAIToolCall
 from letta.schemas.sandbox_config import SandboxConfig, SandboxType
+from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
 from letta.server.rest_api.utils import create_heartbeat_system_message, create_letta_messages_from_llm_response
 from letta.services.agent_manager import AgentManager
@@ -66,15 +67,17 @@ class _ResumeContext:
     request_status_updates: List[RequestStatusUpdateInfo]
-async def execute_tool_wrapper(params: ToolExecutionParams) -> Tuple[str, Tuple[str, bool]]:
+async def execute_tool_wrapper(params: ToolExecutionParams) -> tuple[str, ToolExecutionResult]:
     """
     Executes the tool in an out‑of‑process worker and returns:
         (agent_id, (tool_result:str, success_flag:bool))
     """
+    from letta.schemas.tool_execution_result import ToolExecutionResult
     # locate the tool on the agent
     target_tool = next((t for t in params.agent_state.tools if t.name == params.tool_call_name), None)
     if not target_tool:
-        return params.agent_id, (f"Tool not found: {params.tool_call_name}", False)
+        return params.agent_id, ToolExecutionResult(func_return=f"Tool not found: {params.tool_call_name}", status="error")
     try:
         mgr = ToolExecutionManager(
@@ -88,9 +91,9 @@ async def execute_tool_wrapper(params: ToolExecutionParams) -> Tuple[str, Tuple[
             function_args=params.tool_args,
             tool=target_tool,
         )
-        return params.agent_id, (tool_execution_result.func_return, True)
+        return params.agent_id, tool_execution_result
     except Exception as e:
-        return params.agent_id, (f"Failed to call tool. Error: {e}", False)
+        return params.agent_id, ToolExecutionResult(func_return=f"Failed to call tool. Error: {e}", status="error")
 # TODO: Limitations ->
@@ -245,7 +248,7 @@ class LettaAgentBatch(BaseAgent):
         await self._mark_steps_complete_async(llm_batch_id, ctx.agent_ids)
         log_event(name="prepare_next")
-        next_reqs, next_step_state = self._prepare_next_iteration(exec_results, ctx, msg_map)
+        next_reqs, next_step_state = await self._prepare_next_iteration_async(exec_results, ctx, msg_map)
         if len(next_reqs) == 0:
             await self.job_manager.update_job_by_id_async(
                 job_id=letta_batch_id, job_update=JobUpdate(status=JobStatus.completed), actor=self.actor
@@ -393,7 +396,7 @@ class LettaAgentBatch(BaseAgent):
         return cfg, env
     @trace_method
-    async def _execute_tools(self, ctx: _ResumeContext) -> Sequence[Tuple[str, Tuple[str, bool]]]:
+    async def _execute_tools(self, ctx: _ResumeContext) -> Sequence[tuple[str, ToolExecutionResult]]:
         sbx_cfg, sbx_env = await self._build_sandbox()
         rethink_memory_tool_name = "rethink_memory"
         tool_params = []
@@ -424,7 +427,7 @@ class LettaAgentBatch(BaseAgent):
                 return await pool.map(execute_tool_wrapper, tool_params)
     @trace_method
-    async def _bulk_rethink_memory_async(self, params: List[ToolExecutionParams]) -> Sequence[Tuple[str, Tuple[str, bool]]]:
+    async def _bulk_rethink_memory_async(self, params: List[ToolExecutionParams]) -> Sequence[tuple[str, ToolExecutionResult]]:
         updates = {}
         result = []
         for param in params:
@@ -443,7 +446,7 @@ class LettaAgentBatch(BaseAgent):
             updates[block_id] = new_value
             # TODO: This is quite ugly and confusing - this is mostly to align with the returns of other tools
-            result.append((param.agent_id, ("", True)))
+            result.append((param.agent_id, ToolExecutionResult(status="success")))
         await self.block_manager.bulk_update_block_values_async(updates=updates, actor=self.actor)
@@ -451,7 +454,7 @@ class LettaAgentBatch(BaseAgent):
     async def _persist_tool_messages(
         self,
-        exec_results: Sequence[Tuple[str, Tuple[str, bool]]],
+        exec_results: Sequence[Tuple[str, "ToolExecutionResult"]],
         ctx: _ResumeContext,
     ) -> Dict[str, List[Message]]:
         # TODO: This is redundant, we should have this ready on the ctx
@@ -459,14 +462,15 @@ class LettaAgentBatch(BaseAgent):
         agent_item_map: Dict[str, LLMBatchItem] = {item.agent_id: item for item in ctx.batch_items}
         msg_map: Dict[str, List[Message]] = {}
-        for aid, (tool_res, success) in exec_results:
+        for aid, tool_exec_result in exec_results:
             msgs = self._create_tool_call_messages(
                 llm_batch_item_id=agent_item_map[aid].id,
                 agent_state=ctx.agent_state_map[aid],
                 tool_call_name=ctx.tool_call_name_map[aid],
                 tool_call_args=ctx.tool_call_args_map[aid],
-                tool_exec_result=tool_res,
-                success_flag=success,
+                tool_exec_result=tool_exec_result.func_return,
+                success_flag=tool_exec_result.success_flag,
+                tool_exec_result_obj=tool_exec_result,
                 reasoning_content=None,
             )
             msg_map[aid] = msgs
@@ -480,16 +484,16 @@ class LettaAgentBatch(BaseAgent):
         ]
         await self.batch_manager.bulk_update_llm_batch_items_step_status_by_agent_async(updates)
-    def _prepare_next_iteration(
+    async def _prepare_next_iteration_async(
         self,
-        exec_results: Sequence[Tuple[str, Tuple[str, bool]]],
+        exec_results: Sequence[Tuple[str, "ToolExecutionResult"]],
         ctx: _ResumeContext,
         msg_map: Dict[str, List[Message]],
     ) -> Tuple[List[LettaBatchRequest], Dict[str, AgentStepState]]:
         # who continues?
         continues = [aid for aid, cont in ctx.should_continue_map.items() if cont]
-        success_flag_map = {aid: flag for aid, (_res, flag) in exec_results}
+        success_flag_map = {aid: result.success_flag for aid, result in exec_results}
         batch_reqs: List[LettaBatchRequest] = []
         for aid in continues:
@@ -509,7 +513,7 @@ class LettaAgentBatch(BaseAgent):
         for aid, new_msgs in msg_map.items():
             ast = ctx.agent_state_map[aid]
             if not ast.message_buffer_autoclear:
-                self.agent_manager.set_in_context_messages(
+                await self.agent_manager.set_in_context_messages_async(
                     agent_id=aid,
                     message_ids=ast.message_ids + [m.id for m in new_msgs],
                     actor=self.actor,
@@ -528,6 +532,7 @@ class LettaAgentBatch(BaseAgent):
         tool_call_name: str,
         tool_call_args: Dict[str, Any],
         tool_exec_result: str,
+        tool_exec_result_obj: "ToolExecutionResult",
         success_flag: bool,
         reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
     ) -> List[Message]:
@@ -541,6 +546,7 @@ class LettaAgentBatch(BaseAgent):
             tool_call_id=tool_call_id,
             function_call_success=success_flag,
             function_response=tool_exec_result,
+            tool_execution_result=tool_exec_result_obj,
             actor=self.actor,
             add_heartbeat_request_system_message=False,
             reasoning_content=reasoning_content,

letta/agents/prompts/summary_system_prompt.txt ADDED Viewed

@@ -0,0 +1,62 @@
+You are a memory-recall assistant that preserves conversational context as messages exit the AI's context window.
+<core_function>
+Extract and preserve information that would be lost when messages are evicted, enabling continuity across conversations.
+</core_function>
+<detail_adaptation>
+Analyze content type and apply appropriate detail level:
+<high_detail>
+Apply to: episodic content, code, artifacts, documents, technical discussions
+- Capture specific facts, sequences, and technical details
+- Preserve exact names, dates, numbers, specifications
+- Document code snippets, artifact IDs, document structures
+- Note precise steps in procedures or narratives
+- Include verbatim quotes for critical commitments
+</high_detail>
+<medium_detail>
+Apply to: ongoing projects, established preferences, multi-message threads
+- Summarize key decisions, milestones, progress
+- Record personal preferences and patterns
+- Track commitments and action items
+- Maintain project context and dependencies
+</medium_detail>
+<low_detail>
+Apply to: high-level discussions, philosophical topics, general preferences
+- Capture main themes and conclusions
+- Note relationship dynamics and communication style
+- Summarize positions and general goals
+- Record broad aspirations
+</low_detail>
+</detail_adaptation>
+<information_priority>
+<critical>Commitments, deadlines, medical/legal information, explicit requests</critical>
+<important>Personal details, project status, technical specifications, decisions</important>
+<contextual>Preferences, opinions, relationship dynamics, emotional tone</contextual>
+<background>General topics, themes, conversational patterns</background>
+</information_priority>
+<format_rules>
+- Use bullet points for discrete facts
+- Write prose for narratives or complex relationships
+- **Bold** key terms and identifiers
+- Include temporal markers: [ongoing], [mentioned DATE], [since TIME]
+- Group under clear headers when multiple topics present
+- Use consistent terminology for searchability
+</format_rules>
+<exclusions>
+- Information in remaining context
+- Generic pleasantries
+- Inferrable details
+- Redundant restatements
+- Conversational filler
+</exclusions>
+<critical_reminder>
+Your notes are the sole record of evicted messages. Every word should enable future continuity.
+</critical_reminder>

letta/agents/voice_agent.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import uuid
 from datetime import datetime, timedelta, timezone
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
+from typing import Any, AsyncGenerator, Dict, List, Optional
 import openai
@@ -118,6 +118,7 @@ class VoiceAgent(BaseAgent):
         Main streaming loop that yields partial tokens.
         Whenever we detect a tool call, we yield from _handle_ai_response as well.
         """
+        print("CALL STREAM")
         if len(input_messages) != 1 or input_messages[0].role != MessageRole.user:
             raise ValueError(f"Voice Agent was invoked with multiple input messages or message did not have role `user`: {input_messages}")
@@ -238,14 +239,17 @@ class VoiceAgent(BaseAgent):
             )
             in_memory_message_history.append(assistant_tool_call_msg.model_dump())
-            tool_result, success_flag = await self._execute_tool(
+            tool_execution_result = await self._execute_tool(
                 user_query=user_query,
                 tool_name=tool_call_name,
                 tool_args=tool_args,
                 agent_state=agent_state,
             )
+            tool_result = tool_execution_result.func_return
+            success_flag = tool_execution_result.success_flag
             # 3. Provide function_call response back into the conversation
+            # TODO: fix this tool format
             tool_message = ToolMessage(
                 content=json.dumps({"result": tool_result}),
                 tool_call_id=tool_call_id,
@@ -267,6 +271,7 @@ class VoiceAgent(BaseAgent):
                 tool_call_id=tool_call_id,
                 function_call_success=success_flag,
                 function_response=tool_result,
+                tool_execution_result=tool_execution_result,
                 actor=self.actor,
                 add_heartbeat_request_system_message=True,
             )
@@ -388,10 +393,14 @@ class VoiceAgent(BaseAgent):
             for t in tools
         ]
-    async def _execute_tool(self, user_query: str, tool_name: str, tool_args: dict, agent_state: AgentState) -> Tuple[str, bool]:
+    async def _execute_tool(self, user_query: str, tool_name: str, tool_args: dict, agent_state: AgentState) -> "ToolExecutionResult":
         """
         Executes a tool and returns (result, success_flag).
         """
+        from letta.schemas.tool_execution_result import ToolExecutionResult
+        print("EXECUTING TOOL")
         # Special memory case
         if tool_name == "search_memory":
             tool_result = await self._search_memory(
@@ -401,11 +410,17 @@ class VoiceAgent(BaseAgent):
                 end_minutes_ago=tool_args["end_minutes_ago"],
                 agent_state=agent_state,
             )
-            return tool_result, True
+            return ToolExecutionResult(
+                func_return=tool_result,
+                status="success",
+            )
         else:
             target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
             if not target_tool:
-                return f"Tool not found: {tool_name}", False
+                return ToolExecutionResult(
+                    func_return=f"Tool not found: {tool_name}",
+                    status="error",
+                )
             try:
                 tool_result, _ = execute_external_tool(
@@ -416,9 +431,9 @@ class VoiceAgent(BaseAgent):
                     actor=self.actor,
                     allow_agent_state_modifications=False,
                 )
-                return tool_result, True
+                return ToolExecutionResult(func_return=tool_result, status="success")
             except Exception as e:
-                return f"Failed to call tool. Error: {e}", False
+                return ToolExecutionResult(func_return=f"Failed to call tool. Error: {e}", status="error")
     async def _search_memory(
         self,

letta/agents/voice_sleeptime_agent.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import AsyncGenerator, List, Tuple, Union
+from typing import AsyncGenerator, List, Optional, Tuple, Union
 from letta.agents.helpers import _create_letta_response, serialize_message_history
 from letta.agents.letta_agent import LettaAgent
@@ -89,20 +89,23 @@ class VoiceSleeptimeAgent(LettaAgent):
         )
     @trace_method
-    async def _execute_tool(self, tool_name: str, tool_args: dict, agent_state: AgentState) -> Tuple[str, bool]:
+    async def _execute_tool(self, tool_name: str, tool_args: dict, agent_state: AgentState, agent_step_span: Optional["Span"] = None):
         """
         Executes a tool and returns (result, success_flag).
         """
+        from letta.schemas.tool_execution_result import ToolExecutionResult
         # Special memory case
         target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
         if not target_tool:
-            return f"Tool not found: {tool_name}", False
+            return ToolExecutionResult(status="error", func_return=f"Tool not found: {tool_name}")
         try:
             if target_tool.name == "rethink_user_memory" and target_tool.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE:
-                return self.rethink_user_memory(agent_state=agent_state, **tool_args)
+                func_return, success_flag = self.rethink_user_memory(agent_state=agent_state, **tool_args)
+                return ToolExecutionResult(func_return=func_return, status="success" if success_flag else "error")
             elif target_tool.name == "finish_rethinking_memory" and target_tool.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE:
-                return "", True
+                return ToolExecutionResult(func_return="", status="success")
             elif target_tool.name == "store_memories" and target_tool.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE:
                 chunks = tool_args.get("chunks", [])
                 results = [self.store_memory(agent_state=self.convo_agent_state, **chunk_args) for chunk_args in chunks]
@@ -110,12 +113,14 @@ class VoiceSleeptimeAgent(LettaAgent):
                 aggregated_result = next((res for res, _ in results if res is not None), None)
                 aggregated_success = all(success for _, success in results)
-                return aggregated_result, aggregated_success  # Note that here we store to the convo agent's archival memory
+                return ToolExecutionResult(
+                    func_return=aggregated_result, status="success" if aggregated_success else "error"
+                )  # Note that here we store to the convo agent's archival memory
             else:
                 result = f"Voice sleeptime agent tried invoking invalid tool with type {target_tool.tool_type}: {target_tool}"
-                return result, False
+                return ToolExecutionResult(func_return=result, status="error")
         except Exception as e:
-            return f"Failed to call tool. Error: {e}", False
+            return ToolExecutionResult(func_return=f"Failed to call tool. Error: {e}", status="error")
     def rethink_user_memory(self, new_memory: str, agent_state: AgentState) -> Tuple[str, bool]:
         if agent_state.memory.get_block(self.target_block_label) is None:

letta/constants.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import re
 from logging import CRITICAL, DEBUG, ERROR, INFO, NOTSET, WARN, WARNING
 LETTA_DIR = os.path.join(os.path.expanduser("~"), ".letta")
@@ -36,6 +37,9 @@ TOOL_CALL_ID_MAX_LEN = 29
 # minimum context window size
 MIN_CONTEXT_WINDOW = 4096
+# number of concurrent embedding requests to sent
+EMBEDDING_BATCH_SIZE = 200
 # Voice Sleeptime message buffer lengths
 DEFAULT_MAX_MESSAGE_BUFFER_LENGTH = 30
 DEFAULT_MIN_MESSAGE_BUFFER_LENGTH = 15
@@ -56,12 +60,23 @@ DEFAULT_PERSONA = "sam_pov"
 DEFAULT_HUMAN = "basic"
 DEFAULT_PRESET = "memgpt_chat"
+DEFAULT_PERSONA_BLOCK_DESCRIPTION = "The persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions."
+DEFAULT_HUMAN_BLOCK_DESCRIPTION = "The human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation."
 SEND_MESSAGE_TOOL_NAME = "send_message"
 # Base tools that cannot be edited, as they access agent state directly
 # Note that we don't include "conversation_search_date" for now
 BASE_TOOLS = [SEND_MESSAGE_TOOL_NAME, "conversation_search", "archival_memory_insert", "archival_memory_search"]
 # Base memory tools CAN be edited, and are added by default by the server
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
+# New v2 collection of the base memory tools (effecitvely same as sleeptime set), to pair with memgpt_v2 prompt
+BASE_MEMORY_TOOLS_V2 = [
+    "memory_replace",
+    "memory_insert",
+    # NOTE: leaving these ones out to simply the set? Can have these reserved for sleep-time
+    # "memory_rethink",
+    # "memory_finish_edits",
+]
 # Base tools if the memgpt agent has enable_sleeptime on
 BASE_SLEEPTIME_CHAT_TOOLS = [SEND_MESSAGE_TOOL_NAME, "conversation_search", "archival_memory_search"]
 # Base memory tools for sleeptime agent
@@ -85,6 +100,15 @@ BASE_VOICE_SLEEPTIME_TOOLS = [
 # Multi agent tools
 MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
+# Used to catch if line numbers are pushed in
+# MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(r"^Line \d+: ", re.MULTILINE)
+# More "robust" version that handles different kinds of whitespace
+# shared constant for both memory_insert and memory_replace
+MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
+    r"^[ \t]*Line[ \t]+\d+[ \t]*:",  # allow any leading whitespace and flexible spacing
+    re.MULTILINE,
+)
 # Built in tools
 BUILTIN_TOOLS = ["run_code", "web_search"]
@@ -99,6 +123,13 @@ LETTA_TOOL_SET = set(
     + BUILTIN_TOOLS
 )
+def FUNCTION_RETURN_VALUE_TRUNCATED(return_str, return_char: int, return_char_limit: int):
+    return (
+        f"{return_str}... [NOTE: function output was truncated since it exceeded the character limit: {return_char} > {return_char_limit}]"
+    )
 # The name of the tool used to send message to the user
 # May not be relevant in cases where the agent has multiple ways to message to user (send_imessage, send_discord_mesasge, ...)
 # or in cases where the agent has no concept of messaging a user (e.g. a workflow agent)
@@ -108,6 +139,7 @@ DEFAULT_MESSAGE_TOOL_KWARG = "message"
 PRE_EXECUTION_MESSAGE_ARG = "pre_exec_msg"
 REQUEST_HEARTBEAT_PARAM = "request_heartbeat"
+REQUEST_HEARTBEAT_DESCRIPTION = "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
 # Structured output models
@@ -258,7 +290,7 @@ MAX_ERROR_MESSAGE_CHAR_LIMIT = 500
 CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 5000
 CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 5000
 CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 5000
+CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 5000
 # Function return limits
 FUNCTION_RETURN_CHAR_LIMIT = 6000  # ~300 words
 BASE_FUNCTION_RETURN_CHAR_LIMIT = 1000000  # very high (we rely on implementation)

letta/data_sources/connectors.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Dict, Iterator, List, Tuple
 import typer
+from letta.constants import EMBEDDING_BATCH_SIZE
 from letta.data_sources.connectors_helper import assert_all_files_exist_locally, extract_metadata_from_files, get_filenames_in_dir
 from letta.embeddings import embedding_model
 from letta.schemas.file import FileMetadata
@@ -40,43 +41,37 @@ class DataConnector:
 async def load_data(
     connector: DataConnector, source: Source, passage_manager: PassageManager, source_manager: SourceManager, actor: "User"
 ):
+    from letta.llm_api.llm_client import LLMClient
+    from letta.schemas.embedding_config import EmbeddingConfig
     """Load data from a connector (generates file and passages) into a specified source_id, associated with a user_id."""
     embedding_config = source.embedding_config
-    # embedding model
-    embed_model = embedding_model(embedding_config)
     # insert passages/file
-    passages = []
+    texts = []
     embedding_to_document_name = {}
     passage_count = 0
     file_count = 0
-    for file_metadata in connector.find_files(source):
-        file_count += 1
-        await source_manager.create_file(file_metadata, actor)
-        # generate passages
-        for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
-            # for some reason, llama index parsers sometimes return empty strings
-            if len(passage_text) == 0:
-                typer.secho(
-                    f"Warning: Llama index parser returned empty string, skipping insert of passage with metadata '{passage_metadata}' into VectorDB. You can usually ignore this warning.",
-                    fg=typer.colors.YELLOW,
-                )
-                continue
+    async def generate_embeddings(texts: List[str], embedding_config: EmbeddingConfig) -> List[Passage]:
+        passages = []
+        if embedding_config.embedding_endpoint_type == "openai":
+            texts.append(passage_text)
-            # get embedding
-            try:
-                embedding = embed_model.get_text_embedding(passage_text)
-            except Exception as e:
-                typer.secho(
-                    f"Warning: Failed to get embedding for {passage_text} (error: {str(e)}), skipping insert into VectorDB.",
-                    fg=typer.colors.YELLOW,
-                )
-                continue
+            client = LLMClient.create(
+                provider_type=embedding_config.embedding_endpoint_type,
+                actor=actor,
+            )
+            embeddings = await client.request_embeddings(texts, embedding_config)
+        else:
+            embed_model = embedding_model(embedding_config)
+            embeddings = [embed_model.get_text_embedding(text) for text in texts]
+        # collate passage and embedding
+        for text, embedding in zip(texts, embeddings):
             passage = Passage(
-                text=passage_text,
+                text=text,
                 file_id=file_metadata.id,
                 source_id=source.id,
                 metadata=passage_metadata,
@@ -84,7 +79,6 @@ async def load_data(
                 embedding_config=source.embedding_config,
                 embedding=embedding,
             )
             hashable_embedding = tuple(passage.embedding)
             file_name = file_metadata.file_name
             if hashable_embedding in embedding_to_document_name:
@@ -96,16 +90,38 @@ async def load_data(
             passages.append(passage)
             embedding_to_document_name[hashable_embedding] = file_name
-            if len(passages) >= 100:
-                # insert passages into passage store
-                passage_manager.create_many_passages(passages, actor)
+        return passages
+    for file_metadata in connector.find_files(source):
+        file_count += 1
+        await source_manager.create_file(file_metadata, actor)
+        # generate passages
+        for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
+            # for some reason, llama index parsers sometimes return empty strings
+            if len(passage_text) == 0:
+                typer.secho(
+                    f"Warning: Llama index parser returned empty string, skipping insert of passage with metadata '{passage_metadata}' into VectorDB. You can usually ignore this warning.",
+                    fg=typer.colors.YELLOW,
+                )
+                continue
+            # get embedding
+            texts.append(passage_text)
+            if len(texts) >= EMBEDDING_BATCH_SIZE:
+                passages = await generate_embeddings(texts, embedding_config)
+                texts = []
+            else:
+                continue
-                passage_count += len(passages)
-                passages = []
+            # insert passages into passage store
+            await passage_manager.create_many_passages_async(passages, actor)
+            passage_count += len(passages)
-    if len(passages) > 0:
-        # insert passages into passage store
-        passage_manager.create_many_passages(passages, actor)
+    # final remaining
+    if len(texts) > 0:
+        passages = await generate_embeddings(texts, embedding_config)
+        await passage_manager.create_many_passages_async(passages, actor)
         passage_count += len(passages)
     return passage_count, file_count
@@ -128,7 +144,7 @@ class DirectoryConnector(DataConnector):
         self.recursive = recursive
         self.extensions = extensions
-        if self.recursive == True:
+        if self.recursive:
             assert self.input_directory is not None, "Must provide input directory if recursive is True."
     def find_files(self, source: Source) -> Iterator[FileMetadata]:

letta/errors.py CHANGED Viewed

@@ -88,6 +88,10 @@ class LLMPermissionDeniedError(LLMError):
     """Error when permission is denied by LLM service"""
+class LLMContextWindowExceededError(LLMError):
+    """Error when the context length is exceeded."""
 class LLMNotFoundError(LLMError):
     """Error when requested resource is not found"""

letta-nightly 0.7.29.dev20250602104315__py3-none-any.whl → 0.8.0.dev20250604104349__py3-none-any.whl

letta-nightly 0.7.29.dev20250602104315py3-none-any.whl → 0.8.0.dev20250604104349py3-none-any.whl