PyPI - letta-nightly - Versions diffs - 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl - Mend

letta-nightly 0.6.27.dev20250220104103py3-none-any.whl → 0.6.29.dev20250221033538py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (66) hide show

letta/__init__.py +1 -1
letta/agent.py +19 -2
letta/client/client.py +2 -0
letta/constants.py +2 -0
letta/functions/schema_generator.py +6 -6
letta/helpers/converters.py +153 -0
letta/helpers/tool_rule_solver.py +11 -1
letta/llm_api/anthropic.py +10 -5
letta/llm_api/aws_bedrock.py +1 -1
letta/llm_api/deepseek.py +303 -0
letta/llm_api/helpers.py +20 -10
letta/llm_api/llm_api_tools.py +85 -2
letta/llm_api/openai.py +16 -1
letta/local_llm/chat_completion_proxy.py +15 -2
letta/local_llm/lmstudio/api.py +75 -1
letta/orm/__init__.py +2 -0
letta/orm/agent.py +11 -4
letta/orm/custom_columns.py +31 -110
letta/orm/identities_agents.py +13 -0
letta/orm/identity.py +60 -0
letta/orm/organization.py +2 -0
letta/orm/sqlalchemy_base.py +4 -0
letta/schemas/agent.py +11 -1
letta/schemas/identity.py +67 -0
letta/schemas/llm_config.py +2 -0
letta/schemas/message.py +1 -1
letta/schemas/openai/chat_completion_response.py +2 -0
letta/schemas/providers.py +72 -1
letta/schemas/tool_rule.py +9 -1
letta/serialize_schemas/__init__.py +1 -0
letta/serialize_schemas/agent.py +36 -0
letta/serialize_schemas/base.py +12 -0
letta/serialize_schemas/custom_fields.py +69 -0
letta/serialize_schemas/message.py +15 -0
letta/server/db.py +111 -0
letta/server/rest_api/app.py +8 -0
letta/server/rest_api/chat_completions_interface.py +45 -21
letta/server/rest_api/interface.py +114 -9
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +98 -24
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +14 -3
letta/server/rest_api/routers/v1/identities.py +121 -0
letta/server/rest_api/utils.py +183 -4
letta/server/server.py +23 -117
letta/services/agent_manager.py +53 -6
letta/services/block_manager.py +1 -1
letta/services/identity_manager.py +156 -0
letta/services/job_manager.py +1 -1
letta/services/message_manager.py +1 -1
letta/services/organization_manager.py +1 -1
letta/services/passage_manager.py +1 -1
letta/services/provider_manager.py +1 -1
letta/services/sandbox_config_manager.py +1 -1
letta/services/source_manager.py +1 -1
letta/services/step_manager.py +1 -1
letta/services/tool_manager.py +1 -1
letta/services/user_manager.py +1 -1
letta/settings.py +3 -0
letta/streaming_interface.py +6 -2
letta/tracing.py +205 -0
letta/utils.py +4 -0
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/METADATA +9 -2
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/RECORD +66 -52
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/entry_points.txt +0 -0

letta/llm_api/deepseek.py ADDED Viewed

@@ -0,0 +1,303 @@
+import json
+import re
+import warnings
+from typing import List, Optional
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.message import Message as _Message
+from letta.schemas.openai.chat_completion_request import AssistantMessage, ChatCompletionRequest, ChatMessage
+from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
+from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, ToolMessage, UserMessage, cast_message_to_subtype
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.openai.openai import Function, ToolCall
+from letta.utils import get_tool_call_id
+def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
+    """
+    Merge `ToolMessage` objects into the previous message.
+    """
+    previous_message.content += (
+        f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
+    )
+    return previous_message
+def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
+    """
+    For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
+    """
+    if "tool_calls" in assistant_message.dict().keys():
+        assistant_message.content = "".join(
+            [
+                # f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
+                f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
+                for tool_call in assistant_message.tool_calls
+            ]
+        )
+        del assistant_message.tool_calls
+    return assistant_message
+def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
+    """
+    Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
+    Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
+    This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
+    at the end.
+    """
+    deepseek_messages = []
+    for idx, message in enumerate(messages):
+        # First message is the system prompt, add it
+        if idx == 0 and message.role == "system":
+            deepseek_messages.append(message)
+            continue
+        if message.role == "user":
+            if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
+                # User message, add it
+                deepseek_messages.append(UserMessage(content=message.content))
+            else:
+                # add to the content of the previous message
+                deepseek_messages[-1].content += message.content
+        elif message.role == "assistant":
+            if deepseek_messages[-1].role == "user":
+                # Assistant message, remove tool calls and add them to the content
+                deepseek_messages.append(handle_assistant_message(message))
+            else:
+                # add to the content of the previous message
+                deepseek_messages[-1].content += message.content
+        elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
+            # Tool message, add it to the last assistant message
+            merged_message = merge_tool_message(deepseek_messages[-1], message)
+            deepseek_messages[-1] = merged_message
+        else:
+            print(f"Skipping message: {message}")
+    # This needs to end on a user message, add a dummy message if the last was assistant
+    if deepseek_messages[-1].role == "assistant":
+        deepseek_messages.append(UserMessage(content=""))
+    return deepseek_messages
+def build_deepseek_chat_completions_request(
+    llm_config: LLMConfig,
+    messages: List[_Message],
+    user_id: Optional[str],
+    functions: Optional[list],
+    function_call: Optional[str],
+    use_tool_naming: bool,
+    max_tokens: Optional[int],
+) -> ChatCompletionRequest:
+    # if functions and llm_config.put_inner_thoughts_in_kwargs:
+    #     # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
+    #     # TODO(fix)
+    #     inner_thoughts_desc = (
+    #         INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
+    #     )
+    #     functions = add_inner_thoughts_to_functions(
+    #         functions=functions,
+    #         inner_thoughts_key=INNER_THOUGHTS_KWARG,
+    #         inner_thoughts_description=inner_thoughts_desc,
+    #     )
+    openai_message_list = [cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=False)) for m in messages]
+    if llm_config.model:
+        model = llm_config.model
+    else:
+        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        model = None
+    if use_tool_naming:
+        if function_call is None:
+            tool_choice = None
+        elif function_call not in ["none", "auto", "required"]:
+            tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
+        else:
+            tool_choice = function_call
+        def add_functions_to_system_message(system_message: ChatMessage):
+            system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
+            system_message.content += f'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
+        if llm_config.model == "deepseek-reasoner":  # R1 currently doesn't support function calling natively
+            add_functions_to_system_message(
+                openai_message_list[0]
+            )  # Inject additional instructions to the system prompt with the available functions
+            openai_message_list = map_messages_to_deepseek_format(openai_message_list)
+            data = ChatCompletionRequest(
+                model=model,
+                messages=openai_message_list,
+                user=str(user_id),
+                max_completion_tokens=max_tokens,
+                temperature=llm_config.temperature,
+            )
+        else:
+            data = ChatCompletionRequest(
+                model=model,
+                messages=openai_message_list,
+                tools=[Tool(type="function", function=f) for f in functions] if functions else None,
+                tool_choice=tool_choice,
+                user=str(user_id),
+                max_completion_tokens=max_tokens,
+                temperature=llm_config.temperature,
+            )
+    else:
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            functions=functions,
+            function_call=function_call,
+            user=str(user_id),
+            max_completion_tokens=max_tokens,
+            temperature=llm_config.temperature,
+        )
+    return data
+def convert_deepseek_response_to_chatcompletion(
+    response: ChatCompletionResponse,
+) -> ChatCompletionResponse:
+    """
+        Example response from DeepSeek:
+        ChatCompletion(
+        id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
+        choices=[
+            Choice(
+                finish_reason='stop',
+                index=0,
+                logprobs=None,
+                message=ChatCompletionMessage(
+                    content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
+                    refusal=None,
+                    role='assistant',
+                    audio=None,
+                    function_call=None,
+                    tool_calls=None,
+                    reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
+                )
+            )
+        ],
+        created=1738266449,
+        model='deepseek-reasoner',
+        object='chat.completion',
+        service_tier=None,
+        system_fingerprint='fp_7e73fd9a08',
+        usage=CompletionUsage(
+            completion_tokens=111,
+            prompt_tokens=1270,
+            total_tokens=1381,
+            completion_tokens_details=CompletionTokensDetails(
+                accepted_prediction_tokens=None,
+                audio_tokens=None,
+                reasoning_tokens=72,
+                rejected_prediction_tokens=None
+            ),
+            prompt_tokens_details=PromptTokensDetails(
+                audio_tokens=None,
+                cached_tokens=1088
+            ),
+            prompt_cache_hit_tokens=1088,
+            prompt_cache_miss_tokens=182
+        )
+    )
+    """
+    def convert_dict_quotes(input_dict: dict):
+        """
+        Convert a dictionary with single-quoted keys to double-quoted keys,
+        properly handling boolean values and nested structures.
+        Args:
+            input_dict (dict): Input dictionary with single-quoted keys
+        Returns:
+            str: JSON string with double-quoted keys
+        """
+        # First convert the dictionary to a JSON string to handle booleans properly
+        json_str = json.dumps(input_dict)
+        # Function to handle complex string replacements
+        def replace_quotes(match):
+            key = match.group(1)
+            # Escape any existing double quotes in the key
+            key = key.replace('"', '\\"')
+            return f'"{key}":'
+        # Replace single-quoted keys with double-quoted keys
+        # This regex looks for single-quoted keys followed by a colon
+        def strip_json_block(text):
+            # Check if text starts with ```json or similar
+            if text.strip().startswith("```"):
+                # Split by \n to remove the first and last lines
+                lines = text.split("\n")[1:-1]
+                return "\n".join(lines)
+            return text
+        pattern = r"'([^']*)':"
+        converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
+        # Parse the string back to ensure valid JSON format
+        try:
+            json.loads(converted_str)
+            return converted_str
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
+    def extract_json_block(text):
+        # Find the first {
+        start = text.find("{")
+        if start == -1:
+            return text
+        # Track nested braces to find the matching closing brace
+        brace_count = 0
+        end = start
+        for i in range(start, len(text)):
+            if text[i] == "{":
+                brace_count += 1
+            elif text[i] == "}":
+                brace_count -= 1
+                if brace_count == 0:
+                    end = i + 1
+                    break
+        return text[start:end]
+    content = response.choices[0].message.content
+    try:
+        content_dict = json.loads(extract_json_block(content))
+        if type(content_dict["arguments"]) == str:
+            content_dict["arguments"] = json.loads(content_dict["arguments"])
+        tool_calls = [
+            ToolCall(
+                id=get_tool_call_id(),
+                type="function",
+                function=Function(
+                    name=content_dict["name"],
+                    arguments=convert_dict_quotes(content_dict["arguments"]),
+                ),
+            )
+        ]
+    except (json.JSONDecodeError, TypeError, KeyError) as e:
+        print(e)
+        tool_calls = response.choices[0].message.tool_calls
+        raise ValueError(f"Failed to create valid JSON {content}")
+    # Move the "reasoning_content" into the "content" field
+    response.choices[0].message.content = response.choices[0].message.reasoning_content
+    response.choices[0].message.tool_calls = tool_calls
+    # Remove the "reasoning_content" field
+    response.choices[0].message.reasoning_content = None
+    return response

letta/llm_api/helpers.py CHANGED Viewed

@@ -202,21 +202,29 @@ def add_inner_thoughts_to_functions(
     inner_thoughts_key: str,
     inner_thoughts_description: str,
     inner_thoughts_required: bool = True,
+    put_inner_thoughts_first: bool = True,
 ) -> List[dict]:
     """Add an inner_thoughts kwarg to every function in the provided list, ensuring it's the first parameter"""
     new_functions = []
     for function_object in functions:
         new_function_object = copy.deepcopy(function_object)
-        # Create a new OrderedDict with inner_thoughts as the first item
         new_properties = OrderedDict()
-        new_properties[inner_thoughts_key] = {
-            "type": "string",
-            "description": inner_thoughts_description,
-        }
-        # Add the rest of the properties
-        new_properties.update(function_object["parameters"]["properties"])
+        # For chat completions, we want inner thoughts to come later
+        if put_inner_thoughts_first:
+            # Create with inner_thoughts as the first item
+            new_properties[inner_thoughts_key] = {
+                "type": "string",
+                "description": inner_thoughts_description,
+            }
+            # Add the rest of the properties
+            new_properties.update(function_object["parameters"]["properties"])
+        else:
+            new_properties.update(function_object["parameters"]["properties"])
+            new_properties[inner_thoughts_key] = {
+                "type": "string",
+                "description": inner_thoughts_description,
+            }
         # Cast OrderedDict back to a regular dict
         new_function_object["parameters"]["properties"] = dict(new_properties)
@@ -225,9 +233,11 @@ def add_inner_thoughts_to_functions(
         if inner_thoughts_required:
             required_params = new_function_object["parameters"].get("required", [])
             if inner_thoughts_key not in required_params:
-                required_params.insert(0, inner_thoughts_key)
+                if put_inner_thoughts_first:
+                    required_params.insert(0, inner_thoughts_key)
+                else:
+                    required_params.append(inner_thoughts_key)
                 new_function_object["parameters"]["required"] = required_params
         new_functions.append(new_function_object)
     return new_functions

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import random
 import time
 from typing import List, Optional, Union
@@ -13,6 +14,7 @@ from letta.llm_api.anthropic import (
 )
 from letta.llm_api.aws_bedrock import has_valid_aws_credentials
 from letta.llm_api.azure_openai import azure_openai_chat_completions_request
+from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
 from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.openai import (
@@ -29,8 +31,9 @@ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest,
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.settings import ModelSettings
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
+from letta.tracing import log_event, trace_method
-LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
+LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]
 def retry_with_exponential_backoff(
@@ -68,9 +71,28 @@ def retry_with_exponential_backoff(
                 if http_err.response.status_code in error_codes:
                     # Increment retries
                     num_retries += 1
+                    log_event(
+                        "llm_retry_attempt",
+                        {
+                            "attempt": num_retries,
+                            "delay": delay,
+                            "status_code": http_err.response.status_code,
+                            "error_type": type(http_err).__name__,
+                            "error": str(http_err),
+                        },
+                    )
                     # Check if max retries has been reached
                     if num_retries > max_retries:
+                        log_event(
+                            "llm_max_retries_exceeded",
+                            {
+                                "max_retries": max_retries,
+                                "status_code": http_err.response.status_code,
+                                "error_type": type(http_err).__name__,
+                                "error": str(http_err),
+                            },
+                        )
                         raise RateLimitExceededError("Maximum number of retries exceeded", max_retries=max_retries)
                     # Increment the delay
@@ -84,15 +106,21 @@ def retry_with_exponential_backoff(
                     time.sleep(delay)
                 else:
                     # For other HTTP errors, re-raise the exception
+                    log_event(
+                        "llm_non_retryable_error",
+                        {"status_code": http_err.response.status_code, "error_type": type(http_err).__name__, "error": str(http_err)},
+                    )
                     raise
             # Raise exceptions for any errors not specified
             except Exception as e:
+                log_event("llm_unexpected_error", {"error_type": type(e).__name__, "error": str(e)})
                 raise e
     return wrapper
+@trace_method("LLM Request")
 @retry_with_exponential_backoff
 def create(
     # agent_state: AgentState,
@@ -112,6 +140,7 @@ def create(
     stream: bool = False,
     stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
+    put_inner_thoughts_first: bool = True,
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
     from letta.utils import printd
@@ -157,7 +186,9 @@ def create(
             else:
                 function_call = "required"
-        data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming)
+        data = build_openai_chat_completions_request(
+            llm_config, messages, user_id, functions, function_call, use_tool_naming, put_inner_thoughts_first=put_inner_thoughts_first
+        )
         if stream:  # Client requested token streaming
             data.stream = True
             assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
@@ -453,10 +484,62 @@ def create(
             ),
         )
+    elif llm_config.model_endpoint_type == "deepseek":
+        if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
+            # only is a problem if we are *not* using an openai proxy
+            raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
+        data = build_deepseek_chat_completions_request(
+            llm_config,
+            messages,
+            user_id,
+            functions,
+            function_call,
+            use_tool_naming,
+            llm_config.max_tokens,
+        )
+        if stream:  # Client requested token streaming
+            data.stream = True
+            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
+                stream_interface, AgentRefreshStreamingInterface
+            ), type(stream_interface)
+            response = openai_chat_completions_process_stream(
+                url=llm_config.model_endpoint,
+                api_key=model_settings.deepseek_api_key,
+                chat_completion_request=data,
+                stream_interface=stream_interface,
+            )
+        else:  # Client did not request token streaming (expect a blocking backend response)
+            data.stream = False
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_start()
+            try:
+                response = openai_chat_completions_request(
+                    url=llm_config.model_endpoint,
+                    api_key=model_settings.deepseek_api_key,
+                    chat_completion_request=data,
+                )
+            finally:
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.stream_end()
+        """
+        if llm_config.put_inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        """
+        response = convert_deepseek_response_to_chatcompletion(response)
+        return response
     # local model
     else:
         if stream:
             raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
+        if "DeepSeek-R1".lower() in llm_config.model.lower():  # TODO: move this to the llm_config.
+            messages[0].content[0].text += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
+            messages[0].content[
+                0
+            ].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
         return get_chat_completion(
             model=llm_config.model,
             messages=messages,

letta/llm_api/openai.py CHANGED Viewed

@@ -94,6 +94,7 @@ def build_openai_chat_completions_request(
     functions: Optional[list],
     function_call: Optional[str],
     use_tool_naming: bool,
+    put_inner_thoughts_first: bool = True,
 ) -> ChatCompletionRequest:
     if functions and llm_config.put_inner_thoughts_in_kwargs:
         # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -105,6 +106,7 @@ def build_openai_chat_completions_request(
             functions=functions,
             inner_thoughts_key=INNER_THOUGHTS_KWARG,
             inner_thoughts_description=inner_thoughts_desc,
+            put_inner_thoughts_first=put_inner_thoughts_first,
         )
     openai_message_list = [
@@ -166,6 +168,11 @@ def openai_chat_completions_process_stream(
     create_message_id: bool = True,
     create_message_datetime: bool = True,
     override_tool_call_id: bool = True,
+    # if we expect reasoning content in the response,
+    # then we should emit reasoning_content as "inner_thoughts"
+    # however, we don't necessarily want to put these
+    # expect_reasoning_content: bool = False,
+    expect_reasoning_content: bool = True,
 ) -> ChatCompletionResponse:
     """Process a streaming completion response, and return a ChatCompletionRequest at the end.
@@ -250,6 +257,7 @@ def openai_chat_completions_process_stream(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
                         message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
+                        expect_reasoning_content=expect_reasoning_content,
                     )
                 elif isinstance(stream_interface, AgentRefreshStreamingInterface):
                     stream_interface.process_refresh(chat_completion_response)
@@ -290,6 +298,13 @@ def openai_chat_completions_process_stream(
                     else:
                         accum_message.content += content_delta
+                if expect_reasoning_content and message_delta.reasoning_content is not None:
+                    reasoning_content_delta = message_delta.reasoning_content
+                    if accum_message.reasoning_content is None:
+                        accum_message.reasoning_content = reasoning_content_delta
+                    else:
+                        accum_message.reasoning_content += reasoning_content_delta
                 # TODO(charles) make sure this works for parallel tool calling?
                 if message_delta.tool_calls is not None:
                     tool_calls_delta = message_delta.tool_calls
@@ -377,7 +392,7 @@ def openai_chat_completions_process_stream(
     chat_completion_response.usage.completion_tokens = n_chunks
     chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
-    assert len(chat_completion_response.choices) > 0, chat_completion_response
+    assert len(chat_completion_response.choices) > 0, f"No response from provider {chat_completion_response}"
     # printd(chat_completion_response)
     return chat_completion_response

letta/local_llm/chat_completion_proxy.py CHANGED Viewed

@@ -14,7 +14,7 @@ from letta.local_llm.grammars.gbnf_grammar_generator import create_dynamic_model
 from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
 from letta.local_llm.llamacpp.api import get_llamacpp_completion
 from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
-from letta.local_llm.lmstudio.api import get_lmstudio_completion
+from letta.local_llm.lmstudio.api import get_lmstudio_completion, get_lmstudio_completion_chatcompletions
 from letta.local_llm.ollama.api import get_ollama_completion
 from letta.local_llm.utils import count_tokens, get_available_wrappers
 from letta.local_llm.vllm.api import get_vllm_completion
@@ -141,11 +141,24 @@ def get_chat_completion(
             f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
         )
+    # get the schema for the model
+    """
+    if functions_python is not None:
+        model_schema = generate_schema(functions)
+    else:
+        model_schema = None
+    """
+    # Run the LLM
     try:
+        result_reasoning = None
         if endpoint_type == "webui":
             result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
         elif endpoint_type == "webui-legacy":
             result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
+        elif endpoint_type == "lmstudio-chatcompletions":
+            result, usage, result_reasoning = get_lmstudio_completion_chatcompletions(endpoint, auth_type, auth_key, model, messages)
         elif endpoint_type == "lmstudio":
             result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
         elif endpoint_type == "lmstudio-legacy":
@@ -214,7 +227,7 @@ def get_chat_completion(
                 index=0,
                 message=Message(
                     role=chat_completion_result["role"],
-                    content=chat_completion_result["content"],
+                    content=result_reasoning if result_reasoning is not None else chat_completion_result["content"],
                     tool_calls=(
                         [ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
                         if "function_call" in chat_completion_result

letta-nightly 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.27.dev20250220104103py3-none-any.whl → 0.6.29.dev20250221033538py3-none-any.whl