PyPI - letta-nightly - Versions diffs - 0.5.0.dev20241021104213__py3-none-any.whl → 0.5.0.dev20241022104124__py3-none-any.whl - Mend

letta-nightly 0.5.0.dev20241021104213py3-none-any.whl → 0.5.0.dev20241022104124py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (10) hide show

letta/llm_api/helpers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import copy
 import json
 import warnings
+from collections import OrderedDict
 from typing import Any, List, Union
 import requests
@@ -10,6 +11,30 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.utils import json_dumps, printd
+def convert_to_structured_output(openai_function: dict) -> dict:
+    """Convert function call objects to structured output objects
+    See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+    """
+    structured_output = {
+        "name": openai_function["name"],
+        "description": openai_function["description"],
+        "strict": True,
+        "parameters": {"type": "object", "properties": {}, "additionalProperties": False, "required": []},
+    }
+    for param, details in openai_function["parameters"]["properties"].items():
+        structured_output["parameters"]["properties"][param] = {"type": details["type"], "description": details["description"]}
+        if "enum" in details:
+            structured_output["parameters"]["properties"][param]["enum"] = details["enum"]
+    # Add all properties to required list
+    structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
+    return structured_output
 def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]:
     printd(f"Sending request to {url}")
     try:
@@ -78,33 +103,34 @@ def add_inner_thoughts_to_functions(
     inner_thoughts_key: str,
     inner_thoughts_description: str,
     inner_thoughts_required: bool = True,
-    # inner_thoughts_to_front: bool = True,  TODO support sorting somewhere, probably in the to_dict?
 ) -> List[dict]:
-    """Add an inner_thoughts kwarg to every function in the provided list"""
-    # return copies
+    """Add an inner_thoughts kwarg to every function in the provided list, ensuring it's the first parameter"""
     new_functions = []
-    # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
     for function_object in functions:
-        function_params = function_object["parameters"]["properties"]
-        required_params = list(function_object["parameters"]["required"])
+        new_function_object = copy.deepcopy(function_object)
-        # if the inner thoughts arg doesn't exist, add it
-        if inner_thoughts_key not in function_params:
-            function_params[inner_thoughts_key] = {
-                "type": "string",
-                "description": inner_thoughts_description,
-            }
+        # Create a new OrderedDict with inner_thoughts as the first item
+        new_properties = OrderedDict()
+        new_properties[inner_thoughts_key] = {
+            "type": "string",
+            "description": inner_thoughts_description,
+        }
-        # make sure it's tagged as required
-        new_function_object = copy.deepcopy(function_object)
-        if inner_thoughts_required and inner_thoughts_key not in required_params:
-            required_params.append(inner_thoughts_key)
-            new_function_object["parameters"]["required"] = required_params
+        # Add the rest of the properties
+        new_properties.update(function_object["parameters"]["properties"])
+        # Cast OrderedDict back to a regular dict
+        new_function_object["parameters"]["properties"] = dict(new_properties)
+        # Update required parameters if necessary
+        if inner_thoughts_required:
+            required_params = new_function_object["parameters"].get("required", [])
+            if inner_thoughts_key not in required_params:
+                required_params.insert(0, inner_thoughts_key)
+                new_function_object["parameters"]["required"] = required_params
         new_functions.append(new_function_object)
-    # return a list of copies
     return new_functions

letta/llm_api/openai.py CHANGED Viewed

@@ -9,7 +9,11 @@ from httpx_sse._exceptions import SSEError
 from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
 from letta.errors import LLMError
-from letta.llm_api.helpers import add_inner_thoughts_to_functions, make_post_request
+from letta.llm_api.helpers import (
+    add_inner_thoughts_to_functions,
+    convert_to_structured_output,
+    make_post_request,
+)
 from letta.local_llm.constants import (
     INNER_THOUGHTS_KWARG,
     INNER_THOUGHTS_KWARG_DESCRIPTION,
@@ -112,7 +116,7 @@ def build_openai_chat_completions_request(
     use_tool_naming: bool,
     max_tokens: Optional[int],
 ) -> ChatCompletionRequest:
-    if llm_config.put_inner_thoughts_in_kwargs:
+    if functions and llm_config.put_inner_thoughts_in_kwargs:
         functions = add_inner_thoughts_to_functions(
             functions=functions,
             inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -154,8 +158,8 @@ def build_openai_chat_completions_request(
         )
         # https://platform.openai.com/docs/guides/text-generation/json-mode
         # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
-        if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
-            data.response_format = {"type": "json_object"}
+        # if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
+        # data.response_format = {"type": "json_object"}
     if "inference.memgpt.ai" in llm_config.model_endpoint:
         # override user id for inference.memgpt.ai
@@ -362,6 +366,8 @@ def openai_chat_completions_process_stream(
     chat_completion_response.usage.completion_tokens = n_chunks
     chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
+    assert len(chat_completion_response.choices) > 0, chat_completion_response
     # printd(chat_completion_response)
     return chat_completion_response
@@ -461,6 +467,13 @@ def openai_chat_completions_request_stream(
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
+    if "tools" in data:
+        for tool in data["tools"]:
+            # tool["strict"] = True
+            tool["function"] = convert_to_structured_output(tool["function"])
+    # print(f"\n\n\n\nData[tools]: {json.dumps(data['tools'], indent=2)}")
     printd(f"Sending request to {url}")
     try:
         return _sse_post(url=url, data=data, headers=headers)

letta/server/rest_api/interface.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import AsyncGenerator, Literal, Optional, Union
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.interface import AgentInterface
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG
 from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.letta_message import (
     AssistantMessage,
@@ -23,9 +24,14 @@ from letta.schemas.letta_message import (
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
 from letta.streaming_interface import AgentChunkStreamingInterface
+from letta.streaming_utils import (
+    FunctionArgumentsStreamHandler,
+    JSONInnerThoughtsExtractor,
+)
 from letta.utils import is_utc_datetime
+# TODO strip from code / deprecate
 class QueuingInterface(AgentInterface):
     """Messages are queued inside an internal buffer and manually flushed"""
@@ -248,58 +254,6 @@ class QueuingInterface(AgentInterface):
         self._queue_push(message_api=new_message, message_obj=msg_obj)
-class FunctionArgumentsStreamHandler:
-    """State machine that can process a stream of"""
-    def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
-        self.json_key = json_key
-        self.reset()
-    def reset(self):
-        self.in_message = False
-        self.key_buffer = ""
-        self.accumulating = False
-        self.message_started = False
-    def process_json_chunk(self, chunk: str) -> Optional[str]:
-        """Process a chunk from the function arguments and return the plaintext version"""
-        # Use strip to handle only leading and trailing whitespace in control structures
-        if self.accumulating:
-            clean_chunk = chunk.strip()
-            if self.json_key in self.key_buffer:
-                if ":" in clean_chunk:
-                    self.in_message = True
-                    self.accumulating = False
-                    return None
-            self.key_buffer += clean_chunk
-            return None
-        if self.in_message:
-            if chunk.strip() == '"' and self.message_started:
-                self.in_message = False
-                self.message_started = False
-                return None
-            if not self.message_started and chunk.strip() == '"':
-                self.message_started = True
-                return None
-            if self.message_started:
-                if chunk.strip().endswith('"'):
-                    self.in_message = False
-                    return chunk.rstrip('"\n')
-                return chunk
-        if chunk.strip() == "{":
-            self.key_buffer = ""
-            self.accumulating = True
-            return None
-        if chunk.strip() == "}":
-            self.in_message = False
-            self.message_started = False
-            return None
-        return None
 class StreamingServerInterface(AgentChunkStreamingInterface):
     """Maintain a generator that is a proxy for self.process_chunk()
@@ -316,9 +270,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def __init__(
         self,
         multi_step=True,
+        # Related to if we want to try and pass back the AssistantMessage as a special case function
         use_assistant_message=False,
         assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
         assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
+        # Related to if we expect inner_thoughts to be in the kwargs
+        inner_thoughts_in_kwargs=True,
+        inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
     ):
         # If streaming mode, ignores base interface calls like .assistant_message, etc
         self.streaming_mode = False
@@ -346,6 +304,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         self.assistant_message_function_name = assistant_message_function_name
         self.assistant_message_function_kwarg = assistant_message_function_kwarg
+        # Support for inner_thoughts_in_kwargs
+        self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
+        self.inner_thoughts_kwarg = inner_thoughts_kwarg
+        # A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
+        self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=inner_thoughts_kwarg, wait_for_first_key=True)
+        # Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
+        self.function_name_buffer = None
+        self.function_args_buffer = None
         # extra prints
         self.debug = False
         self.timeout = 30
@@ -365,16 +332,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             # Reset the event until a new item is pushed
             self._event.clear()
-        # while self._active:
-        #     # Wait until there is an item in the deque or the stream is deactivated
-        #     await self._event.wait()
-        #     while self._chunks:
-        #         yield self._chunks.popleft()
-        #     # Reset the event until a new item is pushed
-        #     self._event.clear()
     def get_generator(self) -> AsyncGenerator:
         """Get the generator that yields processed chunks."""
         if not self._active:
@@ -419,18 +376,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
             self._push_to_buffer(self.multi_step_gen_indicator)
-        # self._active = False
-        # self._event.set()  # Unblock the generator if it's waiting to allow it to complete
-        # if not self.multi_step:
-        #     # end the stream
-        #     self._active = False
-        #     self._event.set()  # Unblock the generator if it's waiting to allow it to complete
-        # else:
-        #     # signal that a new step has started in the stream
-        #     self._chunks.append(self.multi_step_indicator)
-        #     self._event.set()  # Signal that new data is available
     def step_complete(self):
         """Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
         if not self.multi_step:
@@ -443,8 +388,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def step_yield(self):
         """If multi_step, this is the true 'stream_end' function."""
-        # if self.multi_step:
-        # end the stream
         self._active = False
         self._event.set()  # Unblock the generator if it's waiting to allow it to complete
@@ -479,8 +422,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
             tool_call = message_delta.tool_calls[0]
+            # TODO(charles) merge into logic for internal_monologue
             # special case for trapping `send_message`
             if self.use_assistant_message and tool_call.function:
+                if self.inner_thoughts_in_kwargs:
+                    raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
                 # If we just received a chunk with the message in it, we either enter "send_message" mode, or we do standard FunctionCallMessage passthrough mode
@@ -538,6 +484,181 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         ),
                     )
+            elif self.inner_thoughts_in_kwargs and tool_call.function:
+                if self.use_assistant_message:
+                    raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
+                processed_chunk = None
+                if tool_call.function.name:
+                    # If we're waiting for the first key, then we should hold back the name
+                    # ie add it to a buffer instead of returning it as a chunk
+                    if self.function_name_buffer is None:
+                        self.function_name_buffer = tool_call.function.name
+                    else:
+                        self.function_name_buffer += tool_call.function.name
+                if tool_call.function.arguments:
+                    updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                    # If we have inner thoughts, we should output them as a chunk
+                    if updates_inner_thoughts:
+                        processed_chunk = InternalMonologue(
+                            id=message_id,
+                            date=message_date,
+                            internal_monologue=updates_inner_thoughts,
+                        )
+                        # Additionally inner thoughts may stream back with a chunk of main JSON
+                        # In that case, since we can only return a chunk at a time, we should buffer it
+                        if updates_main_json:
+                            if self.function_args_buffer is None:
+                                self.function_args_buffer = updates_main_json
+                            else:
+                                self.function_args_buffer += updates_main_json
+                    # If we have main_json, we should output a FunctionCallMessage
+                    elif updates_main_json:
+                        # If there's something in the function_name buffer, we should release it first
+                        # NOTE: we could output it as part of a chunk that has both name and args,
+                        #       however the frontend may expect name first, then args, so to be
+                        #       safe we'll output name first in a separate chunk
+                        if self.function_name_buffer:
+                            processed_chunk = FunctionCallMessage(
+                                id=message_id,
+                                date=message_date,
+                                function_call=FunctionCallDelta(name=self.function_name_buffer, arguments=None),
+                            )
+                            # Clear the buffer
+                            self.function_name_buffer = None
+                            # Since we're clearing the name buffer, we should store
+                            # any updates to the arguments inside a separate buffer
+                            if updates_main_json:
+                                # Add any main_json updates to the arguments buffer
+                                if self.function_args_buffer is None:
+                                    self.function_args_buffer = updates_main_json
+                                else:
+                                    self.function_args_buffer += updates_main_json
+                        # If there was nothing in the name buffer, we can proceed to
+                        # output the arguments chunk as a FunctionCallMessage
+                        else:
+                            # There may be a buffer from a previous chunk, for example
+                            # if the previous chunk had arguments but we needed to flush name
+                            if self.function_args_buffer:
+                                # In this case, we should release the buffer + new data at once
+                                combined_chunk = self.function_args_buffer + updates_main_json
+                                processed_chunk = FunctionCallMessage(
+                                    id=message_id,
+                                    date=message_date,
+                                    function_call=FunctionCallDelta(name=None, arguments=combined_chunk),
+                                )
+                                # clear buffer
+                                self.function_args_buffer = None
+                            else:
+                                # If there's no buffer to clear, just output a new chunk with new data
+                                processed_chunk = FunctionCallMessage(
+                                    id=message_id,
+                                    date=message_date,
+                                    function_call=FunctionCallDelta(name=None, arguments=updates_main_json),
+                                )
+                        # # If there's something in the main_json buffer, we should add if to the arguments and release it together
+                        # tool_call_delta = {}
+                        # if tool_call.id:
+                        #     tool_call_delta["id"] = tool_call.id
+                        # if tool_call.function:
+                        #     if tool_call.function.arguments:
+                        #         # tool_call_delta["arguments"] = tool_call.function.arguments
+                        #         # NOTE: using the stripped one
+                        #         tool_call_delta["arguments"] = updates_main_json
+                        #     # We use the buffered name
+                        #     if self.function_name_buffer:
+                        #         tool_call_delta["name"] = self.function_name_buffer
+                        #     # if tool_call.function.name:
+                        #     # tool_call_delta["name"] = tool_call.function.name
+                        # processed_chunk = FunctionCallMessage(
+                        #     id=message_id,
+                        #     date=message_date,
+                        #     function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
+                        # )
+                    else:
+                        processed_chunk = None
+                return processed_chunk
+                # # NOTE: this is a simplified version of the parsing code that:
+                # # (1) assumes that the inner_thoughts key will always come first
+                # # (2) assumes that there's no extra spaces in the stringified JSON
+                # # i.e., the prefix will look exactly like: "{\"variable\":\"}"
+                # if tool_call.function.arguments:
+                #     self.function_args_buffer += tool_call.function.arguments
+                #     # prefix_str = f'{{"\\"{self.inner_thoughts_kwarg}\\":\\"}}'
+                #     prefix_str = f'{{"{self.inner_thoughts_kwarg}":'
+                #     if self.function_args_buffer.startswith(prefix_str):
+                #         print(f"Found prefix!!!: {self.function_args_buffer}")
+                #     else:
+                #         print(f"No prefix found: {self.function_args_buffer}")
+                # tool_call_delta = {}
+                # if tool_call.id:
+                #     tool_call_delta["id"] = tool_call.id
+                # if tool_call.function:
+                #     if tool_call.function.arguments:
+                #         tool_call_delta["arguments"] = tool_call.function.arguments
+                #     if tool_call.function.name:
+                #         tool_call_delta["name"] = tool_call.function.name
+                # processed_chunk = FunctionCallMessage(
+                #     id=message_id,
+                #     date=message_date,
+                #     function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
+                # )
+            # elif False and self.inner_thoughts_in_kwargs and tool_call.function:
+            #     if self.use_assistant_message:
+            #         raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
+            # if tool_call.function.arguments:
+            # Maintain a state machine to track if we're reading a key vs reading a value
+            # Technically we can we pre-key, post-key, pre-value, post-value
+            # for c in tool_call.function.arguments:
+            #     if self.function_chunks_parsing_state == FunctionChunksParsingState.PRE_KEY:
+            #         if c == '"':
+            #             self.function_chunks_parsing_state = FunctionChunksParsingState.READING_KEY
+            #     elif self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
+            #         if c == '"':
+            #             self.function_chunks_parsing_state = FunctionChunksParsingState.POST_KEY
+            # If we're reading a key:
+            # if self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
+            # We need to buffer the function arguments until we get complete keys
+            # We are reading stringified-JSON, so we need to check for keys in data that looks like:
+            # "arguments":"{\""
+            # "arguments":"inner"
+            # "arguments":"_th"
+            # "arguments":"ought"
+            # "arguments":"s"
+            # "arguments":"\":\""
+            # Once we get a complete key, check if the key matches
+            # If it does match, start processing the value (stringified-JSON string
+            # And with each new chunk, output it as a chunk of type InternalMonologue
+            # If the key doesn't match, then flush the buffer as a single FunctionCallMessage chunk
+            # If we're reading a value
+            # If we're reading the inner thoughts value, we output chunks of type InternalMonologue
+            # Otherwise, do simple chunks of FunctionCallMessage
             else:
                 tool_call_delta = {}
@@ -563,7 +684,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             # skip if there's a finish
             return None
         else:
-            raise ValueError(f"Couldn't find delta in chunk: {chunk}")
+            # Example case that would trigger here:
+            # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
+            # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
+            # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
+            # model='gpt-4o-mini-2024-07-18'
+            # object='chat.completion.chunk'
+            warnings.warn(f"Couldn't find delta in chunk: {chunk}")
+            return None
         return processed_chunk
@@ -663,6 +791,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             #     "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
             #     "id": str(msg_obj.id) if msg_obj is not None else None,
             # }
+            assert msg_obj is not None, "Internal monologue requires msg_obj references for metadata"
             processed_chunk = InternalMonologue(
                 id=msg_obj.id,
                 date=msg_obj.created_at,
@@ -676,18 +805,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
         """Letta uses send_message"""
-        # if not self.streaming_mode and self.send_message_special_case:
-        #     # create a fake "chunk" of a stream
-        #     processed_chunk = {
-        #         "assistant_message": msg,
-        #         "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
-        #         "id": str(msg_obj.id) if msg_obj is not None else None,
-        #     }
-        #     self._chunks.append(processed_chunk)
-        #     self._event.set()  # Signal that new data is available
+        # NOTE: this is a no-op, we handle this special case in function_message instead
         return
     def function_message(self, msg: str, msg_obj: Optional[Message] = None):
@@ -699,6 +817,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         if msg.startswith("Running "):
             if not self.streaming_mode:
                 # create a fake "chunk" of a stream
+                assert msg_obj.tool_calls is not None and len(msg_obj.tool_calls) > 0, "Function call required for function_message"
                 function_call = msg_obj.tool_calls[0]
                 if self.nonstreaming_legacy_mode:
@@ -784,13 +903,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 return
             else:
                 return
-            # msg = msg.replace("Running ", "")
-            # new_message = {"function_call": msg}
         elif msg.startswith("Ran "):
             return
-            # msg = msg.replace("Ran ", "Function call returned: ")
-            # new_message = {"function_call": msg}
         elif msg.startswith("Success: "):
             msg = msg.replace("Success: ", "")
@@ -821,10 +936,4 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             raise ValueError(msg)
             new_message = {"function_message": msg}
-        # add extra metadata
-        # if msg_obj is not None:
-        #     new_message["id"] = str(msg_obj.id)
-        #     assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
-        #     new_message["date"] = msg_obj.created_at.isoformat()
         self._push_to_buffer(new_message)

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -430,9 +430,6 @@ async def send_message_to_agent(
         # Get the generator object off of the agent's streaming interface
         # This will be attached to the POST SSE request used under-the-hood
         letta_agent = server._get_or_load_agent(agent_id=agent_id)
-        streaming_interface = letta_agent.interface
-        if not isinstance(streaming_interface, StreamingServerInterface):
-            raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
         # Disable token streaming if not OpenAI
         # TODO: cleanup this logic
@@ -441,6 +438,12 @@ async def send_message_to_agent(
             print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
             stream_tokens = False
+        # Create a new interface per request
+        letta_agent.interface = StreamingServerInterface()
+        streaming_interface = letta_agent.interface
+        if not isinstance(streaming_interface, StreamingServerInterface):
+            raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
         # Enable token-streaming within the request if desired
         streaming_interface.streaming_mode = stream_tokens
         # "chatcompletion mode" does some remapping and ignores inner thoughts
@@ -454,6 +457,11 @@ async def send_message_to_agent(
         streaming_interface.assistant_message_function_name = assistant_message_function_name
         streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
+        # Related to JSON buffer reader
+        streaming_interface.inner_thoughts_in_kwargs = (
+            llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
+        )
         # Offload the synchronous message_func to a separate thread
         streaming_interface.stream_start()
         task = asyncio.create_task(

letta/streaming_utils.py ADDED Viewed

@@ -0,0 +1,270 @@
+from typing import Optional
+from letta.constants import DEFAULT_MESSAGE_TOOL_KWARG
+class JSONInnerThoughtsExtractor:
+    """
+    A class to process incoming JSON fragments and extract 'inner_thoughts' separately from the main JSON.
+    This handler processes JSON fragments incrementally, parsing out the value associated with a specified key (default is 'inner_thoughts'). It maintains two separate buffers:
+    - `main_json`: Accumulates the JSON data excluding the 'inner_thoughts' key-value pair.
+    - `inner_thoughts`: Accumulates the value associated with the 'inner_thoughts' key.
+    **Parameters:**
+    - `inner_thoughts_key` (str): The key to extract from the JSON (default is 'inner_thoughts').
+    - `wait_for_first_key` (bool): If `True`, holds back main JSON output until after the 'inner_thoughts' value is processed.
+    **Functionality:**
+    - **Stateful Parsing:** Maintains parsing state across fragments.
+    - **String Handling:** Correctly processes strings, escape sequences, and quotation marks.
+    - **Selective Extraction:** Identifies and extracts the value of the specified key.
+    - **Fragment Processing:** Handles data that arrives in chunks.
+    **Usage:**
+    ```python
+    extractor = JSONInnerThoughtsExtractor(wait_for_first_key=True)
+    for fragment in fragments:
+        updates_main_json, updates_inner_thoughts = extractor.process_fragment(fragment)
+    ```
+    """
+    def __init__(self, inner_thoughts_key="inner_thoughts", wait_for_first_key=False):
+        self.inner_thoughts_key = inner_thoughts_key
+        self.wait_for_first_key = wait_for_first_key
+        self.main_buffer = ""
+        self.inner_thoughts_buffer = ""
+        self.state = "start"  # Possible states: start, key, colon, value, comma_or_end, end
+        self.in_string = False
+        self.escaped = False
+        self.current_key = ""
+        self.is_inner_thoughts_value = False
+        self.inner_thoughts_processed = False
+        self.hold_main_json = wait_for_first_key
+        self.main_json_held_buffer = ""
+    def process_fragment(self, fragment):
+        updates_main_json = ""
+        updates_inner_thoughts = ""
+        i = 0
+        while i < len(fragment):
+            c = fragment[i]
+            if self.escaped:
+                self.escaped = False
+                if self.in_string:
+                    if self.state == "key":
+                        self.current_key += c
+                    elif self.state == "value":
+                        if self.is_inner_thoughts_value:
+                            updates_inner_thoughts += c
+                            self.inner_thoughts_buffer += c
+                        else:
+                            if self.hold_main_json:
+                                self.main_json_held_buffer += c
+                            else:
+                                updates_main_json += c
+                                self.main_buffer += c
+                else:
+                    if not self.is_inner_thoughts_value:
+                        if self.hold_main_json:
+                            self.main_json_held_buffer += c
+                        else:
+                            updates_main_json += c
+                            self.main_buffer += c
+            elif c == "\\":
+                self.escaped = True
+                if self.in_string:
+                    if self.state == "key":
+                        self.current_key += c
+                    elif self.state == "value":
+                        if self.is_inner_thoughts_value:
+                            updates_inner_thoughts += c
+                            self.inner_thoughts_buffer += c
+                        else:
+                            if self.hold_main_json:
+                                self.main_json_held_buffer += c
+                            else:
+                                updates_main_json += c
+                                self.main_buffer += c
+                else:
+                    if not self.is_inner_thoughts_value:
+                        if self.hold_main_json:
+                            self.main_json_held_buffer += c
+                        else:
+                            updates_main_json += c
+                            self.main_buffer += c
+            elif c == '"':
+                if not self.escaped:
+                    self.in_string = not self.in_string
+                    if self.in_string:
+                        if self.state in ["start", "comma_or_end"]:
+                            self.state = "key"
+                            self.current_key = ""
+                            # Release held main_json when starting to process the next key
+                            if self.wait_for_first_key and self.hold_main_json and self.inner_thoughts_processed:
+                                updates_main_json += self.main_json_held_buffer
+                                self.main_buffer += self.main_json_held_buffer
+                                self.main_json_held_buffer = ""
+                                self.hold_main_json = False
+                    else:
+                        if self.state == "key":
+                            self.state = "colon"
+                        elif self.state == "value":
+                            # End of value
+                            if self.is_inner_thoughts_value:
+                                self.inner_thoughts_processed = True
+                                # Do not release held main_json here
+                            else:
+                                if self.hold_main_json:
+                                    self.main_json_held_buffer += '"'
+                                else:
+                                    updates_main_json += '"'
+                                    self.main_buffer += '"'
+                            self.state = "comma_or_end"
+                else:
+                    self.escaped = False
+                    if self.in_string:
+                        if self.state == "key":
+                            self.current_key += '"'
+                        elif self.state == "value":
+                            if self.is_inner_thoughts_value:
+                                updates_inner_thoughts += '"'
+                                self.inner_thoughts_buffer += '"'
+                            else:
+                                if self.hold_main_json:
+                                    self.main_json_held_buffer += '"'
+                                else:
+                                    updates_main_json += '"'
+                                    self.main_buffer += '"'
+            elif self.in_string:
+                if self.state == "key":
+                    self.current_key += c
+                elif self.state == "value":
+                    if self.is_inner_thoughts_value:
+                        updates_inner_thoughts += c
+                        self.inner_thoughts_buffer += c
+                    else:
+                        if self.hold_main_json:
+                            self.main_json_held_buffer += c
+                        else:
+                            updates_main_json += c
+                            self.main_buffer += c
+            else:
+                if c == ":" and self.state == "colon":
+                    self.state = "value"
+                    self.is_inner_thoughts_value = self.current_key == self.inner_thoughts_key
+                    if self.is_inner_thoughts_value:
+                        pass  # Do not include 'inner_thoughts' key in main_json
+                    else:
+                        key_colon = f'"{self.current_key}":'
+                        if self.hold_main_json:
+                            self.main_json_held_buffer += key_colon + '"'
+                        else:
+                            updates_main_json += key_colon + '"'
+                            self.main_buffer += key_colon + '"'
+                elif c == "," and self.state == "comma_or_end":
+                    if self.is_inner_thoughts_value:
+                        # Inner thoughts value ended
+                        self.is_inner_thoughts_value = False
+                        self.state = "start"
+                        # Do not release held main_json here
+                    else:
+                        if self.hold_main_json:
+                            self.main_json_held_buffer += c
+                        else:
+                            updates_main_json += c
+                            self.main_buffer += c
+                        self.state = "start"
+                elif c == "{":
+                    if not self.is_inner_thoughts_value:
+                        if self.hold_main_json:
+                            self.main_json_held_buffer += c
+                        else:
+                            updates_main_json += c
+                            self.main_buffer += c
+                elif c == "}":
+                    self.state = "end"
+                    if self.hold_main_json:
+                        self.main_json_held_buffer += c
+                    else:
+                        updates_main_json += c
+                        self.main_buffer += c
+                else:
+                    if self.state == "value":
+                        if self.is_inner_thoughts_value:
+                            updates_inner_thoughts += c
+                            self.inner_thoughts_buffer += c
+                        else:
+                            if self.hold_main_json:
+                                self.main_json_held_buffer += c
+                            else:
+                                updates_main_json += c
+                                self.main_buffer += c
+            i += 1
+        return updates_main_json, updates_inner_thoughts
+    @property
+    def main_json(self):
+        return self.main_buffer
+    @property
+    def inner_thoughts(self):
+        return self.inner_thoughts_buffer
+class FunctionArgumentsStreamHandler:
+    """State machine that can process a stream of"""
+    def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
+        self.json_key = json_key
+        self.reset()
+    def reset(self):
+        self.in_message = False
+        self.key_buffer = ""
+        self.accumulating = False
+        self.message_started = False
+    def process_json_chunk(self, chunk: str) -> Optional[str]:
+        """Process a chunk from the function arguments and return the plaintext version"""
+        # Use strip to handle only leading and trailing whitespace in control structures
+        if self.accumulating:
+            clean_chunk = chunk.strip()
+            if self.json_key in self.key_buffer:
+                if ":" in clean_chunk:
+                    self.in_message = True
+                    self.accumulating = False
+                    return None
+            self.key_buffer += clean_chunk
+            return None
+        if self.in_message:
+            if chunk.strip() == '"' and self.message_started:
+                self.in_message = False
+                self.message_started = False
+                return None
+            if not self.message_started and chunk.strip() == '"':
+                self.message_started = True
+                return None
+            if self.message_started:
+                if chunk.strip().endswith('"'):
+                    self.in_message = False
+                    return chunk.rstrip('"\n')
+                return chunk
+        if chunk.strip() == "{":
+            self.key_buffer = ""
+            self.accumulating = True
+            return None
+        if chunk.strip() == "}":
+            self.in_message = False
+            self.message_started = False
+            return None
+        return None

{letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: letta-nightly
-Version: 0.5.0.dev20241021104213
+Version: 0.5.0.dev20241022104124
 Summary: Create LLM agents with long-term memory and custom tools
 License: Apache License
 Author: Letta Team

{letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/RECORD RENAMED Viewed

@@ -41,10 +41,10 @@ letta/llm_api/azure_openai.py,sha256=C-fuuholudcLJDWjqnXJwpXsfmGWfNugEVWyj6YCrpg
 letta/llm_api/azure_openai_constants.py,sha256=oXtKrgBFHf744gyt5l1thILXgyi8NDNUrKEa2GGGpjw,278
 letta/llm_api/cohere.py,sha256=vDRd-SUGp1t_JUIdwC3RkIhwMl0OY7n-tAU9uPORYkY,14826
 letta/llm_api/google_ai.py,sha256=3xZ074nSOCC22c15yerA5ngWzh0ex4wxeI-6faNbHPE,17708
-letta/llm_api/helpers.py,sha256=8aG6LzB0T3NFlnab-RR2tj0ARUTMBHSd0icCur5-RCk,8813
+letta/llm_api/helpers.py,sha256=sGCmNA1U_7-AhRFgvT668jdp_xyzSliKQYbTvRR6O7c,9812
 letta/llm_api/llm_api_tools.py,sha256=GEBO7Dlt7xtAQud1sVsigKZKPpLOZOt2IWL8LwcNV4o,14869
 letta/llm_api/mistral.py,sha256=fHdfD9ug-rQIk2qn8tRKay1U6w9maF11ryhKi91FfXM,1593
-letta/llm_api/openai.py,sha256=faJLzgx94cxz32VSeSxLDHeeSiKkb5WCM3BA_MNylkI,22821
+letta/llm_api/openai.py,sha256=_kztTpd7IR12bRjvMT1n8dvy5bINvgJ3od65HBQunVk,23212
 letta/local_llm/README.md,sha256=hFJyw5B0TU2jrh9nb0zGZMgdH-Ei1dSRfhvPQG_NSoU,168
 letta/local_llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/local_llm/chat_completion_proxy.py,sha256=SiohxsjGTku4vOryOZx7I0t0xoO_sUuhXgoe62fKq3c,12995
@@ -151,7 +151,7 @@ letta/server/rest_api/app.py,sha256=JNmDnvp9fP--hJPtPpEWgQT-14O1YOceZbWELr2vedA,
 letta/server/rest_api/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/auth/index.py,sha256=fQBGyVylGSRfEMLQ17cZzrHd5Y1xiVylvPqH5Rl-lXQ,1378
 letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
-letta/server/rest_api/interface.py,sha256=Mub9iOQFJh9HSwbc5X6OwHCdtwJYCBzhOjpSx9c5Lss,36181
+letta/server/rest_api/interface.py,sha256=Km1nJSN8Ogyj5P-DmDRyR5fEov-G0o9KEIXAlsqqb5A,44525
 letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/routers/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/routers/openai/assistants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -161,7 +161,7 @@ letta/server/rest_api/routers/openai/assistants/threads.py,sha256=WXVGBaBvSNPB7Z
 letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=-uye6cm4SnoQGwxhr1N1FrSXOlnO2Hvbfj6k8JSc45k,4918
 letta/server/rest_api/routers/v1/__init__.py,sha256=sqlVZa-u9DJwdRsp0_8YUGrac9DHguIB4wETlEDRylA,666
-letta/server/rest_api/routers/v1/agents.py,sha256=Yoktva6_pSCRztUdZNZXdbnrp9L5OKnP5E1mZkbUAGw,25066
+letta/server/rest_api/routers/v1/agents.py,sha256=BY4rQOcwsi_WiWh6DwrO8Vz6Nu2JgMBKSHxiBqlaaYY,25397
 letta/server/rest_api/routers/v1/blocks.py,sha256=0WekE_yBD2U3jYgPxI0DCFjACWavCAlvm_Ybw5SZBnw,2583
 letta/server/rest_api/routers/v1/health.py,sha256=pKCuVESlVOhGIb4VC4K-H82eZqfghmT6kvj2iOkkKuc,401
 letta/server/rest_api/routers/v1/jobs.py,sha256=a-j0v-5A0un0pVCOHpfeWnzpOWkVDQO6ti42k_qAlZY,2272
@@ -187,10 +187,11 @@ letta/server/ws_api/protocol.py,sha256=M_-gM5iuDBwa1cuN2IGNCG5GxMJwU2d3XW93XALv9
 letta/server/ws_api/server.py,sha256=C2Kv48PCwl46DQFb0ZP30s86KJLQ6dZk2AhWQEZn9pY,6004
 letta/settings.py,sha256=gNdH-Ty6f-Nfz2j9ZMZFRQHac2KzgsxLZNt5l_TiAyo,3301
 letta/streaming_interface.py,sha256=_FPUWy58j50evHcpXyd7zB1wWqeCc71NCFeWh_TBvnw,15736
+letta/streaming_utils.py,sha256=329fsvj1ZN0r0LpQtmMPZ2vSxkDBIUUwvGHZFkjm2I8,11745
 letta/system.py,sha256=buKYPqG5n2x41hVmWpu6JUpyd7vTWED9Km2_M7dLrvk,6960
 letta/utils.py,sha256=SXLEYhyp3gHyIjrxNIKNZZ5ittKo3KOj6zxgC_Trex0,31012
-letta_nightly-0.5.0.dev20241021104213.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
-letta_nightly-0.5.0.dev20241021104213.dist-info/METADATA,sha256=HzdXSUkW_tN8xukhNa0g1wcCtIoZAlc9S4vaAHbmdDs,10620
-letta_nightly-0.5.0.dev20241021104213.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-letta_nightly-0.5.0.dev20241021104213.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
-letta_nightly-0.5.0.dev20241021104213.dist-info/RECORD,,
+letta_nightly-0.5.0.dev20241022104124.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
+letta_nightly-0.5.0.dev20241022104124.dist-info/METADATA,sha256=v5hk_4eSmZRsN51JSel2I1mXzkM4XCHiAJrBF7fPC-Y,10620
+letta_nightly-0.5.0.dev20241022104124.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+letta_nightly-0.5.0.dev20241022104124.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
+letta_nightly-0.5.0.dev20241022104124.dist-info/RECORD,,

{letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/LICENSE RENAMED Viewed

File without changes

{letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/WHEEL RENAMED Viewed

File without changes

{letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/entry_points.txt RENAMED Viewed

File without changes

letta-nightly 0.5.0.dev20241021104213__py3-none-any.whl → 0.5.0.dev20241022104124__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.5.0.dev20241021104213py3-none-any.whl → 0.5.0.dev20241022104124py3-none-any.whl