PyPI - letta-nightly - Versions diffs - 0.6.4.dev20241217104233__py3-none-any.whl → 0.6.5.dev20241218055539__py3-none-any.whl - Mend

letta-nightly 0.6.4.dev20241217104233py3-none-any.whl → 0.6.5.dev20241218055539py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (27) hide show

letta/__init__.py +1 -1
letta/agent.py +68 -65
letta/client/client.py +1 -0
letta/constants.py +6 -1
letta/embeddings.py +3 -9
letta/functions/function_sets/base.py +9 -57
letta/functions/schema_generator.py +1 -1
letta/llm_api/anthropic.py +38 -13
letta/llm_api/llm_api_tools.py +12 -1
letta/local_llm/function_parser.py +1 -1
letta/orm/errors.py +8 -0
letta/orm/sqlalchemy_base.py +24 -17
letta/providers.py +2 -0
letta/schemas/agent.py +35 -0
letta/schemas/sandbox_config.py +2 -1
letta/server/rest_api/app.py +32 -7
letta/server/rest_api/routers/v1/tools.py +1 -1
letta/server/server.py +81 -57
letta/services/agent_manager.py +3 -0
letta/services/tool_execution_sandbox.py +54 -45
letta/settings.py +9 -4
letta/utils.py +8 -0
{letta_nightly-0.6.4.dev20241217104233.dist-info → letta_nightly-0.6.5.dev20241218055539.dist-info}/METADATA +1 -1
{letta_nightly-0.6.4.dev20241217104233.dist-info → letta_nightly-0.6.5.dev20241218055539.dist-info}/RECORD +27 -27
{letta_nightly-0.6.4.dev20241217104233.dist-info → letta_nightly-0.6.5.dev20241218055539.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.4.dev20241217104233.dist-info → letta_nightly-0.6.5.dev20241218055539.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.4.dev20241217104233.dist-info → letta_nightly-0.6.5.dev20241218055539.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.6.4"
+__version__ = "0.6.5"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agent.py CHANGED Viewed

@@ -18,6 +18,7 @@ from letta.constants import (
     MESSAGE_SUMMARY_WARNING_FRAC,
     O1_BASE_TOOLS,
     REQ_HEARTBEAT_MESSAGE,
+    STRUCTURED_OUTPUT_MODELS,
 )
 from letta.errors import LLMError
 from letta.helpers import ToolRulesSolver
@@ -63,6 +64,7 @@ from letta.system import (
 )
 from letta.utils import (
     count_tokens,
+    get_friendly_error_msg,
     get_local_time,
     get_tool_call_id,
     get_utc_time,
@@ -258,9 +260,6 @@ class Agent(BaseAgent):
         self.user = user
-        # link tools
-        self.link_tools(agent_state.tools)
         # initialize a tool rules solver
         if agent_state.tool_rules:
             # if there are tool rules, print out a warning
@@ -276,6 +275,7 @@ class Agent(BaseAgent):
         # gpt-4, gpt-3.5-turbo, ...
         self.model = self.agent_state.llm_config.model
+        self.check_tool_rules()
         # state managers
         self.block_manager = BlockManager()
@@ -295,8 +295,6 @@ class Agent(BaseAgent):
         self.agent_manager = AgentManager()
         # State needed for heartbeat pausing
-        self.pause_heartbeats_start = None
-        self.pause_heartbeats_minutes = 0
         self.first_message_verify_mono = first_message_verify_mono
@@ -381,6 +379,16 @@ class Agent(BaseAgent):
         # Create the agent in the DB
         self.update_state()
+    def check_tool_rules(self):
+        if self.model not in STRUCTURED_OUTPUT_MODELS:
+            if len(self.tool_rules_solver.init_tool_rules) > 1:
+                raise ValueError(
+                    "Multiple initial tools are not supported for non-structured models. Please use only one initial tool rule."
+                )
+            self.supports_structured_output = False
+        else:
+            self.supports_structured_output = True
     def update_memory_if_change(self, new_memory: Memory) -> bool:
         """
         Update internal memory object and system prompt if there have been modifications.
@@ -415,11 +423,21 @@ class Agent(BaseAgent):
             return True
         return False
-    def execute_tool_and_persist_state(self, function_name, function_to_call, function_args):
+    def execute_tool_and_persist_state(self, function_name: str, function_args: dict, target_letta_tool: Tool):
         """
         Execute tool modifications and persist the state of the agent.
         Note: only some agent state modifications will be persisted, such as data in the AgentState ORM and block data
         """
+        # TODO: Get rid of this. This whole piece is pretty shady, that we exec the function to just get the type hints for args.
+        env = {}
+        env.update(globals())
+        exec(target_letta_tool.source_code, env)
+        callable_func = env[target_letta_tool.json_schema["name"]]
+        spec = inspect.getfullargspec(callable_func).annotations
+        for name, arg in function_args.items():
+            if isinstance(function_args[name], dict):
+                function_args[name] = spec[name](**function_args[name])
         # TODO: add agent manager here
         orig_memory_str = self.agent_state.memory.compile()
@@ -432,11 +450,11 @@ class Agent(BaseAgent):
             if function_name in BASE_TOOLS or function_name in O1_BASE_TOOLS:
                 # base tools are allowed to access the `Agent` object and run on the database
                 function_args["self"] = self  # need to attach self to arg since it's dynamically linked
-                function_response = function_to_call(**function_args)
+                function_response = callable_func(**function_args)
             else:
                 # execute tool in a sandbox
                 # TODO: allow agent_state to specify which sandbox to execute tools in
-                sandbox_run_result = ToolExecutionSandbox(function_name, function_args, self.agent_state.created_by_id).run(
+                sandbox_run_result = ToolExecutionSandbox(function_name, function_args, self.user).run(
                     agent_state=self.agent_state.__deepcopy__()
                 )
                 function_response, updated_agent_state = sandbox_run_result.func_return, sandbox_run_result.agent_state
@@ -446,12 +464,9 @@ class Agent(BaseAgent):
         except Exception as e:
             # Need to catch error here, or else trunction wont happen
             # TODO: modify to function execution error
-            from letta.constants import MAX_ERROR_MESSAGE_CHAR_LIMIT
-            error_msg = f"Error executing tool {function_name}: {e}"
-            if len(error_msg) > MAX_ERROR_MESSAGE_CHAR_LIMIT:
-                error_msg = error_msg[:MAX_ERROR_MESSAGE_CHAR_LIMIT]
-            raise ValueError(error_msg)
+            function_response = get_friendly_error_msg(
+                function_name=function_name, exception_name=type(e).__name__, exception_message=str(e)
+            )
         return function_response
@@ -464,27 +479,6 @@ class Agent(BaseAgent):
     def messages(self, value):
         raise Exception("Modifying message list directly not allowed")
-    def link_tools(self, tools: List[Tool]):
-        """Bind a tool object (schema + python function) to the agent object"""
-        # Store the functions schemas (this is passed as an argument to ChatCompletion)
-        self.functions = []
-        self.functions_python = {}
-        env = {}
-        env.update(globals())
-        for tool in tools:
-            try:
-                # WARNING: name may not be consistent?
-                # if tool.module:  # execute the whole module
-                #    exec(tool.module, env)
-                # else:
-                exec(tool.source_code, env)
-                self.functions_python[tool.json_schema["name"]] = env[tool.json_schema["name"]]
-                self.functions.append(tool.json_schema)
-            except Exception:
-                warnings.warn(f"WARNING: tool {tool.name} failed to link")
-        assert all([callable(f) for k, f in self.functions_python.items()]), self.functions_python
     def _load_messages_from_recall(self, message_ids: List[str]) -> List[Message]:
         """Load a list of messages from recall storage"""
@@ -588,14 +582,32 @@ class Agent(BaseAgent):
         empty_response_retry_limit: int = 3,
         backoff_factor: float = 0.5,  # delay multiplier for exponential backoff
         max_delay: float = 10.0,  # max delay between retries
+        step_count: Optional[int] = None,
     ) -> ChatCompletionResponse:
         """Get response from LLM API with robust retry mechanism."""
         allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names()
+        agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
         allowed_functions = (
-            self.functions if not allowed_tool_names else [func for func in self.functions if func["name"] in allowed_tool_names]
+            agent_state_tool_jsons
+            if not allowed_tool_names
+            else [func for func in agent_state_tool_jsons if func["name"] in allowed_tool_names]
         )
+        # For the first message, force the initial tool if one is specified
+        force_tool_call = None
+        if (
+            step_count is not None
+            and step_count == 0
+            and not self.supports_structured_output
+            and len(self.tool_rules_solver.init_tool_rules) > 0
+        ):
+            force_tool_call = self.tool_rules_solver.init_tool_rules[0].tool_name
+        # Force a tool call if exactly one tool is specified
+        elif step_count is not None and step_count > 0 and len(allowed_tool_names) == 1:
+            force_tool_call = allowed_tool_names[0]
         for attempt in range(1, empty_response_retry_limit + 1):
             try:
                 response = create(
@@ -603,9 +615,10 @@ class Agent(BaseAgent):
                     messages=message_sequence,
                     user_id=self.agent_state.created_by_id,
                     functions=allowed_functions,
-                    functions_python=self.functions_python,
+                    # functions_python=self.functions_python, do we need this?
                     function_call=function_call,
                     first_message=first_message,
+                    force_tool_call=force_tool_call,
                     stream=stream,
                     stream_interface=self.interface,
                 )
@@ -711,10 +724,13 @@ class Agent(BaseAgent):
             function_name = function_call.name
             printd(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
-            # Failure case 1: function name is wrong
-            try:
-                function_to_call = self.functions_python[function_name]
-            except KeyError:
+            # Failure case 1: function name is wrong (not in agent_state.tools)
+            target_letta_tool = None
+            for t in self.agent_state.tools:
+                if t.name == function_name:
+                    target_letta_tool = t
+            if not target_letta_tool:
                 error_msg = f"No function named {function_name}"
                 function_response = package_function_response(False, error_msg)
                 messages.append(
@@ -782,14 +798,8 @@ class Agent(BaseAgent):
             #       this is because the function/tool role message is only created once the function/tool has executed/returned
             self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1])
             try:
-                spec = inspect.getfullargspec(function_to_call).annotations
-                for name, arg in function_args.items():
-                    if isinstance(function_args[name], dict):
-                        function_args[name] = spec[name](**function_args[name])
                 # handle tool execution (sandbox) and state updates
-                function_response = self.execute_tool_and_persist_state(function_name, function_to_call, function_args)
+                function_response = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
                 # handle trunction
                 if function_name in ["conversation_search", "conversation_search_date", "archival_memory_search"]:
@@ -801,8 +811,7 @@ class Agent(BaseAgent):
                     truncate = True
                 # get the function response limit
-                tool_obj = [tool for tool in self.agent_state.tools if tool.name == function_name][0]
-                return_char_limit = tool_obj.return_char_limit
+                return_char_limit = target_letta_tool.return_char_limit
                 function_response_string = validate_function_response(
                     function_response, return_char_limit=return_char_limit, truncate=truncate
                 )
@@ -897,6 +906,7 @@ class Agent(BaseAgent):
         step_count = 0
         while True:
             kwargs["first_message"] = False
+            kwargs["step_count"] = step_count
             step_response = self.inner_step(
                 messages=next_input_message,
                 **kwargs,
@@ -972,6 +982,7 @@ class Agent(BaseAgent):
         first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
         skip_verify: bool = False,
         stream: bool = False,  # TODO move to config?
+        step_count: Optional[int] = None,
     ) -> AgentStepResponse:
         """Runs a single step in the agent loop (generates at most one LLM call)"""
@@ -1014,7 +1025,9 @@ class Agent(BaseAgent):
             else:
                 response = self._get_ai_reply(
                     message_sequence=input_message_sequence,
+                    first_message=first_message,
                     stream=stream,
+                    step_count=step_count,
                 )
             # Step 3: check if LLM wanted to call a function
@@ -1235,17 +1248,6 @@ class Agent(BaseAgent):
         printd(f"Ran summarizer, messages length {prior_len} -> {len(self.messages)}")
-    def heartbeat_is_paused(self):
-        """Check if there's a requested pause on timed heartbeats"""
-        # Check if the pause has been initiated
-        if self.pause_heartbeats_start is None:
-            return False
-        # Check if it's been more than pause_heartbeats_minutes since pause_heartbeats_start
-        elapsed_time = get_utc_time() - self.pause_heartbeats_start
-        return elapsed_time.total_seconds() < self.pause_heartbeats_minutes * 60
     def _swap_system_message_in_buffer(self, new_system_message: str):
         """Update the system message (NOT prompt) of the Agent (requires updating the internal buffer)"""
         assert isinstance(new_system_message, str)
@@ -1370,7 +1372,7 @@ class Agent(BaseAgent):
         agent_manager: AgentManager,
     ):
         """Attach a source to the agent using the SourcesAgents ORM relationship.
         Args:
             user: User performing the action
             source_id: ID of the source to attach
@@ -1553,9 +1555,10 @@ class Agent(BaseAgent):
         num_tokens_external_memory_summary = count_tokens(external_memory_summary)
         # tokens taken up by function definitions
-        if self.functions:
-            available_functions_definitions = [ChatCompletionRequestTool(type="function", function=f) for f in self.functions]
-            num_tokens_available_functions_definitions = num_tokens_from_functions(functions=self.functions, model=self.model)
+        agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
+        if agent_state_tool_jsons:
+            available_functions_definitions = [ChatCompletionRequestTool(type="function", function=f) for f in agent_state_tool_jsons]
+            num_tokens_available_functions_definitions = num_tokens_from_functions(functions=agent_state_tool_jsons, model=self.model)
         else:
             available_functions_definitions = []
             num_tokens_available_functions_definitions = 0

letta/client/client.py CHANGED Viewed

@@ -2156,6 +2156,7 @@ class LocalClient(AbstractClient):
             "block_ids": [b.id for b in memory.get_blocks()] + block_ids,
             "tool_ids": tool_ids,
             "tool_rules": tool_rules,
+            "include_base_tools": include_base_tools,
             "system": system,
             "agent_type": agent_type,
             "llm_config": llm_config if llm_config else self._default_llm_config,

letta/constants.py CHANGED Viewed

@@ -23,6 +23,7 @@ MIN_CONTEXT_WINDOW = 4096
 # embeddings
 MAX_EMBEDDING_DIM = 4096  # maximum supported embeding size - do NOT change or else DBs will need to be reset
+DEFAULT_EMBEDDING_CHUNK_SIZE = 300
 # tokenizers
 EMBEDDING_TO_TOKENIZER_MAP = {
@@ -37,7 +38,8 @@ DEFAULT_HUMAN = "basic"
 DEFAULT_PRESET = "memgpt_chat"
 # Base tools that cannot be edited, as they access agent state directly
-BASE_TOOLS = ["send_message", "conversation_search", "conversation_search_date", "archival_memory_insert", "archival_memory_search"]
+# Note that we don't include "conversation_search_date" for now
+BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "archival_memory_search"]
 O1_BASE_TOOLS = ["send_thinking_message", "send_final_message"]
 # Base memory tools CAN be edited, and are added by default by the server
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
@@ -48,6 +50,9 @@ BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
 DEFAULT_MESSAGE_TOOL = "send_message"
 DEFAULT_MESSAGE_TOOL_KWARG = "message"
+# Structured output models
+STRUCTURED_OUTPUT_MODELS = {"gpt-4o", "gpt-4o-mini"}
 # LOGGER_LOG_LEVEL is use to convert Text to Logging level value for logging mostly for Cli input to setting level
 LOGGER_LOG_LEVELS = {"CRITICAL": CRITICAL, "ERROR": ERROR, "WARN": WARN, "WARNING": WARNING, "INFO": INFO, "DEBUG": DEBUG, "NOTSET": NOTSET}

letta/embeddings.py CHANGED Viewed

@@ -234,16 +234,10 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
         )
     elif endpoint_type == "ollama":
-        from llama_index.embeddings.ollama import OllamaEmbedding
-        ollama_additional_kwargs = {}
-        callback_manager = None
-        model = OllamaEmbedding(
-            model_name=config.embedding_model,
+        model = OllamaEmbeddings(
+            model=config.embedding_model,
             base_url=config.embedding_endpoint,
-            ollama_additional_kwargs=ollama_additional_kwargs or {},
-            callback_manager=callback_manager or None,
+            ollama_additional_kwargs={},
         )
         return model

letta/functions/function_sets/base.py CHANGED Viewed

@@ -1,16 +1,6 @@
-from datetime import datetime
 from typing import Optional
 from letta.agent import Agent
-from letta.constants import MAX_PAUSE_HEARTBEATS
-from letta.services.agent_manager import AgentManager
-# import math
-# from letta.utils import json_dumps
-### Functions / tools the agent can use
-# All functions should return a response string (or None)
-# If the function fails, throw an exception
 def send_message(self: "Agent", message: str) -> Optional[str]:
@@ -28,36 +18,6 @@ def send_message(self: "Agent", message: str) -> Optional[str]:
     return None
-# Construct the docstring dynamically (since it should use the external constants)
-pause_heartbeats_docstring = f"""
-Temporarily ignore timed heartbeats. You may still receive messages from manual heartbeats and other events.
-Args:
-    minutes (int): Number of minutes to ignore heartbeats for. Max value of {MAX_PAUSE_HEARTBEATS} minutes ({MAX_PAUSE_HEARTBEATS // 60} hours).
-Returns:
-    str: Function status response
-"""
-def pause_heartbeats(self: "Agent", minutes: int) -> Optional[str]:
-    import datetime
-    from letta.constants import MAX_PAUSE_HEARTBEATS
-    minutes = min(MAX_PAUSE_HEARTBEATS, minutes)
-    # Record the current time
-    self.pause_heartbeats_start = datetime.datetime.now(datetime.timezone.utc)
-    # And record how long the pause should go for
-    self.pause_heartbeats_minutes = int(minutes)
-    return f"Pausing timed heartbeats for {minutes} min"
-pause_heartbeats.__doc__ = pause_heartbeats_docstring
 def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> Optional[str]:
     """
     Search prior conversation history using case-insensitive string matching.
@@ -84,19 +44,19 @@ def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> O
     count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
     # TODO: add paging by page number. currently cursor only works with strings.
     # original: start=page * count
-    results = self.message_manager.list_user_messages_for_agent(
+    messages = self.message_manager.list_user_messages_for_agent(
         agent_id=self.agent_state.id,
         actor=self.user,
         query_text=query,
         limit=count,
     )
-    total = len(results)
+    total = len(messages)
     num_pages = math.ceil(total / count) - 1  # 0 index
-    if len(results) == 0:
+    if len(messages) == 0:
         results_str = f"No results found."
     else:
-        results_pref = f"Showing {len(results)} of {total} results (page {page}/{num_pages}):"
-        results_formatted = [f"timestamp: {d['timestamp']}, {d['message']['role']} - {d['message']['content']}" for d in results]
+        results_pref = f"Showing {len(messages)} of {total} results (page {page}/{num_pages}):"
+        results_formatted = [message.text for message in messages]
         results_str = f"{results_pref} {json_dumps(results_formatted)}"
     return results_str
@@ -114,6 +74,7 @@ def conversation_search_date(self: "Agent", start_date: str, end_date: str, page
         str: Query result string
     """
     import math
+    from datetime import datetime
     from letta.constants import RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
     from letta.utils import json_dumps
@@ -142,7 +103,6 @@ def conversation_search_date(self: "Agent", start_date: str, end_date: str, page
         start_date=start_datetime,
         end_date=end_datetime,
         limit=count,
-        # start_date=start_date, end_date=end_date, limit=count, start=page * count
     )
     total = len(results)
     num_pages = math.ceil(total / count) - 1  # 0 index
@@ -186,10 +146,8 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
     Returns:
         str: Query result string
     """
-    import math
     from letta.constants import RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
-    from letta.utils import json_dumps
     if page is None or (isinstance(page, str) and page.lower().strip() == "none"):
         page = 0
@@ -198,7 +156,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
     except:
         raise ValueError(f"'page' argument must be an integer")
     count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
     try:
         # Get results using passage manager
         all_results = self.agent_manager.list_passages(
@@ -207,7 +165,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
             query_text=query,
             limit=count + start,  # Request enough results to handle offset
             embedding_config=self.agent_state.embedding_config,
-            embed_query=True
+            embed_query=True,
         )
         # Apply pagination
@@ -215,13 +173,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
         paged_results = all_results[start:end]
         # Format results to match previous implementation
-        formatted_results = [
-            {
-                "timestamp": str(result.created_at),
-                "content": result.text
-            }
-            for result in paged_results
-        ]
+        formatted_results = [{"timestamp": str(result.created_at), "content": result.text} for result in paged_results]
         return formatted_results, len(formatted_results)

letta/functions/schema_generator.py CHANGED Viewed

@@ -386,7 +386,7 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
     # append the heartbeat
     # TODO: don't hard-code
     # TODO: if terminal, don't include this
-    if function.__name__ not in ["send_message", "pause_heartbeats"]:
+    if function.__name__ not in ["send_message"]:
         schema["parameters"]["properties"]["request_heartbeat"] = {
             "type": "boolean",
             "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",

letta/llm_api/anthropic.py CHANGED Viewed

@@ -99,16 +99,20 @@ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
         - 1 level less of nesting
         - "parameters" -> "input_schema"
     """
-    tools_dict_list = []
+    formatted_tools = []
     for tool in tools:
-        tools_dict_list.append(
-            {
-                "name": tool.function.name,
-                "description": tool.function.description,
-                "input_schema": tool.function.parameters,
+        formatted_tool = {
+            "name"         : tool.function.name,
+            "description"  : tool.function.description,
+            "input_schema"   : tool.function.parameters or {
+                "type": "object",
+                "properties": {},
+                "required": []
             }
-        )
-    return tools_dict_list
+        }
+        formatted_tools.append(formatted_tool)
+    return formatted_tools
 def merge_tool_results_into_user_messages(messages: List[dict]):
@@ -258,10 +262,24 @@ def convert_anthropic_response_to_chatcompletion(
                     ),
                 )
             ]
-        else:
-            # Just inner mono
-            content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
-            tool_calls = None
+        elif len(response_json["content"]) == 1:
+            if response_json["content"][0]["type"] == "tool_use":
+                # function call only
+                content = None
+                tool_calls = [
+                    ToolCall(
+                        id=response_json["content"][0]["id"],
+                        type="function",
+                        function=FunctionCall(
+                            name=response_json["content"][0]["name"],
+                            arguments=json.dumps(response_json["content"][0]["input"], indent=2),
+                        ),
+                    )
+                ]
+            else:
+                # inner mono only
+                content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
+                tool_calls = None
     else:
         raise RuntimeError("Unexpected type for content in response_json.")
@@ -323,6 +341,14 @@ def anthropic_chat_completions_request(
     if anthropic_tools is not None:
         data["tools"] = anthropic_tools
+        # TODO: Add support for other tool_choice options like "auto", "any"
+        if len(anthropic_tools) == 1:
+            data["tool_choice"] = {
+                "type": "tool",  # Changed from "function" to "tool"
+                "name": anthropic_tools[0]["name"],  # Directly specify name without nested "function" object
+                "disable_parallel_tool_use": True  # Force single tool use
+            }
     # Move 'system' to the top level
     # 'messages: Unexpected role "system". The Messages API accepts a top-level `system` parameter, not "system" as an input message role.'
     assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
@@ -358,7 +384,6 @@ def anthropic_chat_completions_request(
     data.pop("top_p", None)
     data.pop("presence_penalty", None)
     data.pop("user", None)
-    data.pop("tool_choice", None)
     response_json = make_post_request(url, headers, data)
     return convert_anthropic_response_to_chatcompletion(response_json=response_json, inner_thoughts_xml_tag=inner_thoughts_xml_tag)

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -113,6 +113,7 @@ def create(
     function_call: str = "auto",
     # hint
     first_message: bool = False,
+    force_tool_call: Optional[str] = None,  # Force a specific tool to be called
     # use tool naming?
     # if false, will use deprecated 'functions' style
     use_tool_naming: bool = True,
@@ -252,6 +253,16 @@ def create(
         if not use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
+        tool_call = None
+        if force_tool_call is not None:
+            tool_call = {
+                "type": "function",
+                "function": {
+                    "name": force_tool_call
+                }
+            }
+            assert functions is not None
         return anthropic_chat_completions_request(
             url=llm_config.model_endpoint,
             api_key=model_settings.anthropic_api_key,
@@ -259,7 +270,7 @@ def create(
                 model=llm_config.model,
                 messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
                 tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-                # tool_choice=function_call,
+                tool_choice=tool_call,
                 # user=str(user_id),
                 # NOTE: max_tokens is required for Anthropic API
                 max_tokens=1024,  # TODO make dynamic

letta/local_llm/function_parser.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 from letta.utils import json_dumps, json_loads
-NO_HEARTBEAT_FUNCS = ["send_message", "pause_heartbeats"]
+NO_HEARTBEAT_FUNCS = ["send_message"]
 def insert_heartbeat(message):

letta/orm/errors.py CHANGED Viewed

@@ -12,3 +12,11 @@ class UniqueConstraintViolationError(ValueError):
 class ForeignKeyConstraintViolationError(ValueError):
     """Custom exception for foreign key constraint violations."""
+class DatabaseTimeoutError(Exception):
+    """Custom exception for database timeout issues."""
+    def __init__(self, message="Database operation timed out", original_exception=None):
+        super().__init__(message)
+        self.original_exception = original_exception

letta-nightly 0.6.4.dev20241217104233__py3-none-any.whl → 0.6.5.dev20241218055539__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.4.dev20241217104233py3-none-any.whl → 0.6.5.dev20241218055539py3-none-any.whl