PyPI - letta-nightly - Versions diffs - 0.5.0.dev20241017104103__py3-none-any.whl → 0.5.0.dev20241019104023__py3-none-any.whl - Mend

letta-nightly 0.5.0.dev20241017104103py3-none-any.whl → 0.5.0.dev20241019104023py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (32) hide show

letta/agent.py +29 -14
letta/cli/cli.py +0 -2
letta/client/client.py +41 -6
letta/constants.py +1 -1
letta/functions/helpers.py +3 -3
letta/llm_api/anthropic.py +1 -1
letta/llm_api/helpers.py +0 -15
letta/llm_api/llm_api_tools.py +35 -47
letta/llm_api/openai.py +18 -8
letta/local_llm/llm_chat_completion_wrappers/chatml.py +1 -1
letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +1 -1
letta/local_llm/utils.py +22 -6
letta/main.py +0 -4
letta/metadata.py +19 -6
letta/o1_agent.py +87 -0
letta/personas/examples/o1_persona.txt +5 -0
letta/prompts/system/memgpt_modified_o1.txt +31 -0
letta/schemas/agent.py +30 -2
letta/schemas/llm_config.py +24 -1
letta/schemas/memory.py +4 -0
letta/schemas/openai/chat_completion_request.py +2 -2
letta/schemas/tool.py +34 -2
letta/server/rest_api/app.py +1 -0
letta/server/rest_api/routers/v1/agents.py +14 -6
letta/server/rest_api/routers/v1/tools.py +9 -6
letta/server/server.py +63 -22
letta/settings.py +3 -0
{letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/METADATA +2 -2
{letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/RECORD +32 -29
{letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/LICENSE +0 -0
{letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/WHEEL +0 -0
{letta_nightly-0.5.0.dev20241017104103.dist-info → letta_nightly-0.5.0.dev20241019104023.dist-info}/entry_points.txt +0 -0

letta/agent.py CHANGED Viewed

@@ -23,16 +23,19 @@ from letta.errors import LLMError
 from letta.interface import AgentInterface
 from letta.llm_api.helpers import is_context_overflow_error
 from letta.llm_api.llm_api_tools import create
-from letta.local_llm.utils import num_tokens_from_messages
+from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
 from letta.metadata import MetadataStore
 from letta.persistence_manager import LocalStateManager
 from letta.schemas.agent import AgentState, AgentStepResponse
 from letta.schemas.block import Block
 from letta.schemas.embedding_config import EmbeddingConfig
-from letta.schemas.enums import MessageRole, OptionState
+from letta.schemas.enums import MessageRole
 from letta.schemas.memory import ContextWindowOverview, Memory
 from letta.schemas.message import Message, UpdateMessage
+from letta.schemas.openai.chat_completion_request import (
+    Tool as ChatCompletionRequestTool,
+)
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.schemas.openai.chat_completion_response import (
     Message as ChatCompletionMessage,
@@ -463,15 +466,14 @@ class Agent(BaseAgent):
         function_call: str = "auto",
         first_message: bool = False,  # hint
         stream: bool = False,  # TODO move to config?
-        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
     ) -> ChatCompletionResponse:
         """Get response from LLM API"""
         try:
             response = create(
                 # agent_state=self.agent_state,
                 llm_config=self.agent_state.llm_config,
-                user_id=self.agent_state.user_id,
                 messages=message_sequence,
+                user_id=self.agent_state.user_id,
                 functions=self.functions,
                 functions_python=self.functions_python,
                 function_call=function_call,
@@ -480,8 +482,6 @@ class Agent(BaseAgent):
                 # streaming
                 stream=stream,
                 stream_interface=self.interface,
-                # putting inner thoughts in func args or not
-                inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
             )
             if len(response.choices) == 0 or response.choices[0] is None:
@@ -822,7 +822,6 @@ class Agent(BaseAgent):
         first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
         skip_verify: bool = False,
         stream: bool = False,  # TODO move to config?
-        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """Runs a single step in the agent loop (generates at most one LLM call)"""
@@ -861,10 +860,7 @@ class Agent(BaseAgent):
                 counter = 0
                 while True:
                     response = self._get_ai_reply(
-                        message_sequence=input_message_sequence,
-                        first_message=True,  # passed through to the prompt formatter
-                        stream=stream,
-                        inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
+                        message_sequence=input_message_sequence, first_message=True, stream=stream  # passed through to the prompt formatter
                     )
                     if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
                         break
@@ -877,7 +873,6 @@ class Agent(BaseAgent):
                 response = self._get_ai_reply(
                     message_sequence=input_message_sequence,
                     stream=stream,
-                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                 )
             # Step 3: check if LLM wanted to call a function
@@ -954,7 +949,6 @@ class Agent(BaseAgent):
                     first_message_retry_limit=first_message_retry_limit,
                     skip_verify=skip_verify,
                     stream=stream,
-                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     ms=ms,
                 )
@@ -1467,6 +1461,24 @@ class Agent(BaseAgent):
         )
         num_tokens_external_memory_summary = count_tokens(external_memory_summary)
+        # tokens taken up by function definitions
+        if self.functions:
+            available_functions_definitions = [ChatCompletionRequestTool(type="function", function=f) for f in self.functions]
+            num_tokens_available_functions_definitions = num_tokens_from_functions(functions=self.functions, model=self.model)
+        else:
+            available_functions_definitions = []
+            num_tokens_available_functions_definitions = 0
+        num_tokens_used_total = (
+            num_tokens_system  # system prompt
+            + num_tokens_available_functions_definitions  # function definitions
+            + num_tokens_core_memory  # core memory
+            + num_tokens_external_memory_summary  # metadata (statistics) about recall/archival
+            + num_tokens_summary_memory  # summary of ongoing conversation
+            + num_tokens_messages  # tokens taken by messages
+        )
+        assert isinstance(num_tokens_used_total, int)
         return ContextWindowOverview(
             # context window breakdown (in messages)
             num_messages=len(self._messages),
@@ -1475,7 +1487,7 @@ class Agent(BaseAgent):
             num_tokens_external_memory_summary=num_tokens_external_memory_summary,
             # top-level information
             context_window_size_max=self.agent_state.llm_config.context_window,
-            context_window_size_current=num_tokens_system + num_tokens_core_memory + num_tokens_summary_memory + num_tokens_messages,
+            context_window_size_current=num_tokens_used_total,
             # context window breakdown (in tokens)
             num_tokens_system=num_tokens_system,
             system_prompt=system_prompt,
@@ -1485,6 +1497,9 @@ class Agent(BaseAgent):
             summary_memory=summary_memory,
             num_tokens_messages=num_tokens_messages,
             messages=self._messages,
+            # related to functions
+            num_tokens_functions_definitions=num_tokens_available_functions_definitions,
+            functions_definitions=available_functions_definitions,
         )

letta/cli/cli.py CHANGED Viewed

@@ -49,7 +49,6 @@ def server(
     ade: Annotated[bool, typer.Option(help="Allows remote access")] = False,
 ):
     """Launch a Letta server process"""
     if type == ServerChoice.rest_api:
         pass
@@ -321,7 +320,6 @@ def run(
         ms=ms,
         no_verify=no_verify,
         stream=stream,
-        inner_thoughts_in_kwargs=no_content,
     )  # TODO: add back no_verify

letta/client/client.py CHANGED Viewed

@@ -96,6 +96,9 @@ class AbstractClient(object):
     ):
         raise NotImplementedError
+    def get_tools_from_agent(self, agent_id: str):
+        raise NotImplementedError
     def add_tool_to_agent(self, agent_id: str, tool_id: str):
         raise NotImplementedError
@@ -197,7 +200,7 @@ class AbstractClient(object):
     ) -> Tool:
         raise NotImplementedError
-    def list_tools(self) -> List[Tool]:
+    def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
         raise NotImplementedError
     def get_tool(self, id: str) -> Tool:
@@ -480,6 +483,21 @@ class RESTClient(AbstractClient):
             raise ValueError(f"Failed to update agent: {response.text}")
         return AgentState(**response.json())
+    def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
+        """
+        Get tools to an existing agent
+        Args:
+           agent_id (str): ID of the agent
+        Returns:
+           List[Tool]: A List of Tool objs
+        """
+        response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/tools", headers=self.headers)
+        if response.status_code != 200:
+            raise ValueError(f"Failed to get tools from agents: {response.text}")
+        return [Tool(**tool) for tool in response.json()]
     def add_tool_to_agent(self, agent_id: str, tool_id: str):
         """
         Add tool to an existing agent
@@ -1364,14 +1382,19 @@ class RESTClient(AbstractClient):
     #        raise ValueError(f"Failed to create tool: {response.text}")
     #    return ToolModel(**response.json())
-    def list_tools(self) -> List[Tool]:
+    def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
         """
         List available tools for the user.
         Returns:
             tools (List[Tool]): List of tools
         """
-        response = requests.get(f"{self.base_url}/{self.api_prefix}/tools", headers=self.headers)
+        params = {}
+        if cursor:
+            params["cursor"] = str(cursor)
+        if limit:
+            params["limit"] = limit
+        response = requests.get(f"{self.base_url}/{self.api_prefix}/tools", params=params, headers=self.headers)
         if response.status_code != 200:
             raise ValueError(f"Failed to list tools: {response.text}")
         return [Tool(**tool) for tool in response.json()]
@@ -1692,6 +1715,19 @@ class LocalClient(AbstractClient):
         )
         return agent_state
+    def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
+        """
+        Get tools from an existing agent.
+        Args:
+            agent_id (str): ID of the agent
+        Returns:
+            List[Tool]: A list of Tool objs
+        """
+        self.interface.clear()
+        return self.server.get_tools_from_agent(agent_id=agent_id, user_id=self.user_id)
     def add_tool_to_agent(self, agent_id: str, tool_id: str):
         """
         Add tool to an existing agent
@@ -2250,15 +2286,14 @@ class LocalClient(AbstractClient):
             ToolUpdate(id=id, source_type=source_type, source_code=source_code, tags=tags, name=name), self.user_id
         )
-    def list_tools(self):
+    def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[Tool]:
         """
         List available tools for the user.
         Returns:
             tools (List[Tool]): List of tools
         """
-        tools = self.server.list_tools(user_id=self.user_id)
-        return tools
+        return self.server.list_tools(cursor=cursor, limit=limit, user_id=self.user_id)
     def get_tool(self, id: str) -> Optional[Tool]:
         """

letta/constants.py CHANGED Viewed

@@ -139,7 +139,7 @@ CORE_MEMORY_PERSONA_CHAR_LIMIT = 2000
 CORE_MEMORY_HUMAN_CHAR_LIMIT = 2000
 # Function return limits
-FUNCTION_RETURN_CHAR_LIMIT = 3000  # ~300 words
+FUNCTION_RETURN_CHAR_LIMIT = 6000  # ~300 words
 MAX_PAUSE_HEARTBEATS = 360  # in min

letta/functions/helpers.py CHANGED Viewed

@@ -5,10 +5,10 @@ from pydantic import BaseModel
 def generate_composio_tool_wrapper(action: "ActionType") -> tuple[str, str]:
     # Instantiate the object
-    tool_instantiation_str = f"composio_toolset.get_tools(actions=[Action.{action.name}])[0]"
+    tool_instantiation_str = f"composio_toolset.get_tools(actions=[Action.{str(action)}])[0]"
     # Generate func name
-    func_name = f"run_{action.name}"
+    func_name = f"run_{action.name.lower()}"
     wrapper_function_str = f"""
 def {func_name}(**kwargs):
@@ -19,7 +19,7 @@ def {func_name}(**kwargs):
     composio_toolset = ComposioToolSet()
     tool = {tool_instantiation_str}
-    tool.func(**kwargs)
+    return tool.func(**kwargs)['data']
     """
     # Compile safety check

letta/llm_api/anthropic.py CHANGED Viewed

@@ -53,7 +53,7 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
     return MODEL_LIST
-def convert_tools_to_anthropic_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
+def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
     """See: https://docs.anthropic.com/claude/docs/tool-use
     OpenAI style:

letta/llm_api/helpers.py CHANGED Viewed

@@ -6,7 +6,6 @@ from typing import Any, List, Union
 import requests
 from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
-from letta.schemas.enums import OptionState
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
 from letta.utils import json_dumps, printd
@@ -200,17 +199,3 @@ def is_context_overflow_error(exception: Union[requests.exceptions.RequestExcept
     # Generic fail
     else:
         return False
-def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
-    if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
-        # model that are known to not use `content` fields on tool calls
-        inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
-    else:
-        inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
-    if not isinstance(inner_thoughts_in_kwargs, bool):
-        warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
-        inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
-    return inner_thoughts_in_kwargs

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import random
 import time
 from typing import List, Optional, Union
@@ -8,14 +7,12 @@ import requests
 from letta.constants import CLI_WARNING_PREFIX
 from letta.llm_api.anthropic import anthropic_chat_completions_request
 from letta.llm_api.azure_openai import azure_openai_chat_completions_request
-from letta.llm_api.cohere import cohere_chat_completions_request
 from letta.llm_api.google_ai import (
     convert_tools_to_google_ai_format,
     google_ai_chat_completions_request,
 )
 from letta.llm_api.helpers import (
     add_inner_thoughts_to_functions,
-    derive_inner_thoughts_in_kwargs,
     unpack_all_inner_thoughts_from_kwargs,
 )
 from letta.llm_api.openai import (
@@ -28,7 +25,6 @@ from letta.local_llm.constants import (
     INNER_THOUGHTS_KWARG,
     INNER_THOUGHTS_KWARG_DESCRIPTION,
 )
-from letta.schemas.enums import OptionState
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import (
@@ -120,9 +116,6 @@ def create(
     # streaming?
     stream: bool = False,
     stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
-    # TODO move to llm_config?
-    # if unspecified (None), default to something we've tested
-    inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
     max_tokens: Optional[int] = None,
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
 ) -> ChatCompletionResponse:
@@ -146,10 +139,7 @@ def create(
             # only is a problem if we are *not* using an openai proxy
             raise ValueError(f"OpenAI key is missing from letta config file")
-        inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
-        data = build_openai_chat_completions_request(
-            llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
-        )
+        data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
         if stream:  # Client requested token streaming
             data.stream = True
@@ -176,7 +166,7 @@ def create(
                 if isinstance(stream_interface, AgentChunkStreamingInterface):
                     stream_interface.stream_end()
-        if inner_thoughts_in_kwargs:
+        if llm_config.put_inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
         return response
@@ -198,9 +188,8 @@ def create(
         # Set the llm config model_endpoint from model_settings
         # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
         llm_config.model_endpoint = model_settings.azure_base_url
-        inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
         chat_completion_request = build_openai_chat_completions_request(
-            llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
+            llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens
         )
         response = azure_openai_chat_completions_request(
@@ -210,7 +199,7 @@ def create(
             chat_completion_request=chat_completion_request,
         )
-        if inner_thoughts_in_kwargs:
+        if llm_config.put_inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
         return response
@@ -224,7 +213,7 @@ def create(
         if functions is not None:
             tools = [{"type": "function", "function": f} for f in functions]
             tools = [Tool(**t) for t in tools]
-            tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=True)
+            tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
         else:
             tools = None
@@ -237,7 +226,7 @@ def create(
                 contents=[m.to_google_ai_dict() for m in messages],
                 tools=tools,
             ),
-            inner_thoughts_in_kwargs=True,
+            inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
         )
     elif llm_config.model_endpoint_type == "anthropic":
@@ -260,32 +249,32 @@ def create(
             ),
         )
-    elif llm_config.model_endpoint_type == "cohere":
-        if stream:
-            raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
-        if not use_tool_naming:
-            raise NotImplementedError("Only tool calling supported on Cohere API requests")
-        if functions is not None:
-            tools = [{"type": "function", "function": f} for f in functions]
-            tools = [Tool(**t) for t in tools]
-        else:
-            tools = None
-        return cohere_chat_completions_request(
-            # url=llm_config.model_endpoint,
-            url="https://api.cohere.ai/v1",  # TODO
-            api_key=os.getenv("COHERE_API_KEY"),  # TODO remove
-            chat_completion_request=ChatCompletionRequest(
-                model="command-r-plus",  # TODO
-                messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-                tools=tools,
-                tool_choice=function_call,
-                # user=str(user_id),
-                # NOTE: max_tokens is required for Anthropic API
-                # max_tokens=1024,  # TODO make dynamic
-            ),
-        )
+    # elif llm_config.model_endpoint_type == "cohere":
+    #     if stream:
+    #         raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
+    #     if not use_tool_naming:
+    #         raise NotImplementedError("Only tool calling supported on Cohere API requests")
+    #
+    #     if functions is not None:
+    #         tools = [{"type": "function", "function": f} for f in functions]
+    #         tools = [Tool(**t) for t in tools]
+    #     else:
+    #         tools = None
+    #
+    #     return cohere_chat_completions_request(
+    #         # url=llm_config.model_endpoint,
+    #         url="https://api.cohere.ai/v1",  # TODO
+    #         api_key=os.getenv("COHERE_API_KEY"),  # TODO remove
+    #         chat_completion_request=ChatCompletionRequest(
+    #             model="command-r-plus",  # TODO
+    #             messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
+    #             tools=tools,
+    #             tool_choice=function_call,
+    #             # user=str(user_id),
+    #             # NOTE: max_tokens is required for Anthropic API
+    #             # max_tokens=1024,  # TODO make dynamic
+    #         ),
+    #     )
     elif llm_config.model_endpoint_type == "groq":
         if stream:
@@ -295,8 +284,7 @@ def create(
             raise ValueError(f"Groq key is missing from letta config file")
         # force to true for groq, since they don't support 'content' is non-null
-        inner_thoughts_in_kwargs = True
-        if inner_thoughts_in_kwargs:
+        if llm_config.put_inner_thoughts_in_kwargs:
             functions = add_inner_thoughts_to_functions(
                 functions=functions,
                 inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -306,7 +294,7 @@ def create(
         tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
         data = ChatCompletionRequest(
             model=llm_config.model,
-            messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs) for m in messages],
+            messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs) for m in messages],
             tools=tools,
             tool_choice=function_call,
             user=str(user_id),
@@ -335,7 +323,7 @@ def create(
             if isinstance(stream_interface, AgentChunkStreamingInterface):
                 stream_interface.stream_end()
-        if inner_thoughts_in_kwargs:
+        if llm_config.put_inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
         return response

letta/llm_api/openai.py CHANGED Viewed

@@ -18,8 +18,13 @@ from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_mes
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
+from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
 from letta.schemas.openai.chat_completion_request import (
-    ChatCompletionRequest,
+    FunctionCall as ToolFunctionChoiceFunctionCall,
+)
+from letta.schemas.openai.chat_completion_request import (
+    Tool,
+    ToolFunctionChoice,
     cast_message_to_subtype,
 )
 from letta.schemas.openai.chat_completion_response import (
@@ -100,15 +105,14 @@ def openai_get_model_list(
 def build_openai_chat_completions_request(
     llm_config: LLMConfig,
-    messages: List[Message],
+    messages: List[_Message],
     user_id: Optional[str],
     functions: Optional[list],
-    function_call: str,
+    function_call: Optional[str],
     use_tool_naming: bool,
-    inner_thoughts_in_kwargs: bool,
     max_tokens: Optional[int],
 ) -> ChatCompletionRequest:
-    if inner_thoughts_in_kwargs:
+    if llm_config.put_inner_thoughts_in_kwargs:
         functions = add_inner_thoughts_to_functions(
             functions=functions,
             inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -116,7 +120,7 @@ def build_openai_chat_completions_request(
         )
     openai_message_list = [
-        cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
+        cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)) for m in messages
     ]
     if llm_config.model:
         model = llm_config.model
@@ -125,11 +129,17 @@ def build_openai_chat_completions_request(
         model = None
     if use_tool_naming:
+        if function_call is None:
+            tool_choice = None
+        elif function_call not in ["none", "auto", "required"]:
+            tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
+        else:
+            tool_choice = function_call
         data = ChatCompletionRequest(
             model=model,
             messages=openai_message_list,
-            tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-            tool_choice=function_call,
+            tools=[Tool(type="function", function=f) for f in functions] if functions else None,
+            tool_choice=tool_choice,
             user=str(user_id),
             max_tokens=max_tokens,
         )

letta/local_llm/llm_chat_completion_wrappers/chatml.py CHANGED Viewed

@@ -188,7 +188,7 @@ class ChatMLInnerMonologueWrapper(LLMChatCompletionWrapper):
         try:
             # indent the function replies
             function_return_dict = json_loads(message["content"])
-            function_return_str = json_dumps(function_return_dict, indent=self.json_indent)
+            function_return_str = json_dumps(function_return_dict, indent=0)
         except:
             function_return_str = message["content"]

letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py CHANGED Viewed

@@ -183,7 +183,7 @@ class ConfigurableJSONWrapper(LLMChatCompletionWrapper):
         try:
             # indent the function replies
             function_return_dict = json_loads(message["content"])
-            function_return_str = json_dumps(function_return_dict, indent=self.json_indent)
+            function_return_str = json_dumps(function_return_dict, indent=0)
         except:
             function_return_str = message["content"]

letta/local_llm/utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import warnings
-from typing import List
+from typing import List, Union
 import requests
 import tiktoken
@@ -11,6 +11,7 @@ import letta.local_llm.llm_chat_completion_wrappers.configurable_wrapper as conf
 import letta.local_llm.llm_chat_completion_wrappers.dolphin as dolphin
 import letta.local_llm.llm_chat_completion_wrappers.llama3 as llama3
 import letta.local_llm.llm_chat_completion_wrappers.zephyr as zephyr
+from letta.schemas.openai.chat_completion_request import Tool, ToolCall
 def post_json_auth_request(uri, json_payload, auth_type, auth_key):
@@ -123,7 +124,7 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
     return num_tokens
-def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
+def num_tokens_from_tool_calls(tool_calls: Union[List[dict], List[ToolCall]], model: str = "gpt-4"):
     """Based on above code (num_tokens_from_functions).
     Example to encode:
@@ -144,10 +145,25 @@ def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
     num_tokens = 0
     for tool_call in tool_calls:
-        function_tokens = len(encoding.encode(tool_call["id"]))
-        function_tokens += 2 + len(encoding.encode(tool_call["type"]))
-        function_tokens += 2 + len(encoding.encode(tool_call["function"]["name"]))
-        function_tokens += 2 + len(encoding.encode(tool_call["function"]["arguments"]))
+        if isinstance(tool_call, dict):
+            tool_call_id = tool_call["id"]
+            tool_call_type = tool_call["type"]
+            tool_call_function = tool_call["function"]
+            tool_call_function_name = tool_call_function["name"]
+            tool_call_function_arguments = tool_call_function["arguments"]
+        elif isinstance(tool_call, Tool):
+            tool_call_id = tool_call.id
+            tool_call_type = tool_call.type
+            tool_call_function = tool_call.function
+            tool_call_function_name = tool_call_function.name
+            tool_call_function_arguments = tool_call_function.arguments
+        else:
+            raise ValueError(f"Unknown tool call type: {type(tool_call)}")
+        function_tokens = len(encoding.encode(tool_call_id))
+        function_tokens += 2 + len(encoding.encode(tool_call_type))
+        function_tokens += 2 + len(encoding.encode(tool_call_function_name))
+        function_tokens += 2 + len(encoding.encode(tool_call_function_arguments))
         num_tokens += function_tokens

letta/main.py CHANGED Viewed

@@ -20,7 +20,6 @@ from letta.cli.cli_load import app as load_app
 from letta.config import LettaConfig
 from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
 from letta.metadata import MetadataStore
-from letta.schemas.enums import OptionState
 # from letta.interface import CLIInterface as interface  # for printing to terminal
 from letta.streaming_interface import AgentRefreshStreamingInterface
@@ -64,7 +63,6 @@ def run_agent_loop(
     no_verify: bool = False,
     strip_ui: bool = False,
     stream: bool = False,
-    inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
 ):
     if isinstance(letta_agent.interface, AgentRefreshStreamingInterface):
         # letta_agent.interface.toggle_streaming(on=stream)
@@ -369,7 +367,6 @@ def run_agent_loop(
                     first_message=False,
                     skip_verify=no_verify,
                     stream=stream,
-                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
                     ms=ms,
                 )
             else:
@@ -378,7 +375,6 @@ def run_agent_loop(
                     first_message=False,
                     skip_verify=no_verify,
                     stream=stream,
-                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
                     ms=ms,
                 )
             new_messages = step_response.messages

letta-nightly 0.5.0.dev20241017104103__py3-none-any.whl → 0.5.0.dev20241019104023__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.5.0.dev20241017104103py3-none-any.whl → 0.5.0.dev20241019104023py3-none-any.whl