PyPI - letta-nightly - Versions diffs - 0.11.7.dev20250913103940__py3-none-any.whl → 0.11.7.dev20250914103918__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20250913103940py3-none-any.whl → 0.11.7.dev20250914103918py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -24,7 +24,11 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
 from letta.server.rest_api.json_parser import OptimisticJSONParser
-from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
+from letta.streaming_utils import (
+    FunctionArgumentsStreamHandler,
+    JSONInnerThoughtsExtractor,
+    sanitize_streamed_message_content,
+)
 from letta.utils import count_tokens
 logger = get_logger(__name__)
@@ -278,8 +282,6 @@ class OpenAIStreamingInterface:
                                     self.prev_assistant_message_id = self.function_id_buffer
                                 # Reset message reader at the start of a new send_message stream
                                 self.assistant_message_json_reader.reset()
-                                self.assistant_message_json_reader.in_message = True
-                                self.assistant_message_json_reader.message_started = True
                             else:
                                 if prev_message_type and prev_message_type != "tool_call_message":
@@ -334,8 +336,15 @@ class OpenAIStreamingInterface:
                                 self.last_flushed_function_name is not None
                                 and self.last_flushed_function_name == self.assistant_message_tool_name
                             ):
-                                # Minimal, robust extraction: only emit the value of "message"
-                                extracted = self.assistant_message_json_reader.process_json_chunk(tool_call.function.arguments)
+                                # Minimal, robust extraction: only emit the value of "message".
+                                # If we buffered a prefix while name was streaming, feed it first.
+                                if self.function_args_buffer:
+                                    payload = self.function_args_buffer + tool_call.function.arguments
+                                    self.function_args_buffer = None
+                                else:
+                                    payload = tool_call.function.arguments
+                                extracted = self.assistant_message_json_reader.process_json_chunk(payload)
+                                extracted = sanitize_streamed_message_content(extracted or "")
                                 if extracted:
                                     if prev_message_type and prev_message_type != "assistant_message":
                                         message_index += 1

letta/server/rest_api/interface.py CHANGED Viewed

@@ -808,86 +808,33 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         # If there was nothing in the name buffer, we can proceed to
                         # output the arguments chunk as a ToolCallMessage
                         else:
-                            # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
+                            # use_assistant_message means we should emit only the value of "message"
                             if self.use_assistant_message and (
                                 self.last_flushed_function_name is not None
                                 and self.last_flushed_function_name == self.assistant_message_tool_name
                             ):
-                                # do an additional parse on the updates_main_json
-                                if self.function_args_buffer:
-                                    updates_main_json = self.function_args_buffer + updates_main_json
-                                    self.function_args_buffer = None
-                                    # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
-                                    match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
-                                    if updates_main_json == match_str:
-                                        updates_main_json = None
-                                else:
-                                    # Some hardcoding to strip off the trailing "}"
-                                    if updates_main_json in ["}", '"}']:
-                                        updates_main_json = None
-                                    if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
-                                        updates_main_json = updates_main_json[:-1]
-                                if not updates_main_json:
-                                    # early exit to turn into content mode
+                                # Feed any buffered prefix first to avoid missing the start of the value
+                                payload = (self.function_args_buffer or "") + (updates_main_json or "")
+                                self.function_args_buffer = None
+                                cleaned = self.streaming_chat_completion_json_reader.process_json_chunk(payload)
+                                from letta.streaming_utils import sanitize_streamed_message_content
+                                cleaned = sanitize_streamed_message_content(cleaned or "")
+                                if not cleaned:
                                     return None
-                                # There may be a buffer from a previous chunk, for example
-                                # if the previous chunk had arguments but we needed to flush name
-                                if self.function_args_buffer:
-                                    # In this case, we should release the buffer + new data at once
-                                    combined_chunk = self.function_args_buffer + updates_main_json
-                                    if prev_message_type and prev_message_type != "assistant_message":
-                                        message_index += 1
-                                    processed_chunk = AssistantMessage(
-                                        id=message_id,
-                                        date=message_date,
-                                        content=combined_chunk,
-                                        name=name,
-                                        otid=Message.generate_otid_from_id(message_id, message_index),
-                                    )
-                                    # Store the ID of the tool call so allow skipping the corresponding response
-                                    if self.function_id_buffer:
-                                        self.prev_assistant_message_id = self.function_id_buffer
-                                    # clear buffer
-                                    self.function_args_buffer = None
-                                    self.function_id_buffer = None
-                                else:
-                                    # If there's no buffer to clear, just output a new chunk with new data
-                                    # TODO: THIS IS HORRIBLE
-                                    # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
-                                    # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
-                                    parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
-                                    if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
-                                        self.assistant_message_tool_kwarg
-                                    ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
-                                        new_content = parsed_args.get(self.assistant_message_tool_kwarg)
-                                        prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
-                                        # TODO: Assumes consistent state and that prev_content is subset of new_content
-                                        diff = new_content.replace(prev_content, "", 1)
-                                        self.current_json_parse_result = parsed_args
-                                        if prev_message_type and prev_message_type != "assistant_message":
-                                            message_index += 1
-                                        processed_chunk = AssistantMessage(
-                                            id=message_id,
-                                            date=message_date,
-                                            content=diff,
-                                            name=name,
-                                            otid=Message.generate_otid_from_id(message_id, message_index),
-                                        )
-                                    else:
-                                        return None
-                                    # Store the ID of the tool call so allow skipping the corresponding response
-                                    if self.function_id_buffer:
-                                        self.prev_assistant_message_id = self.function_id_buffer
-                                    # clear buffers
-                                    self.function_id_buffer = None
+                                if prev_message_type and prev_message_type != "assistant_message":
+                                    message_index += 1
+                                processed_chunk = AssistantMessage(
+                                    id=message_id,
+                                    date=message_date,
+                                    content=cleaned,
+                                    name=name,
+                                    otid=Message.generate_otid_from_id(message_id, message_index),
+                                )
+                                # Store the ID of the tool call so allow skipping the corresponding response
+                                if self.function_id_buffer:
+                                    self.prev_assistant_message_id = self.function_id_buffer
+                                # Do not clear function_id_buffer here — we may still need it
                             else:
                                 # There may be a buffer from a previous chunk, for example
                                 # if the previous chunk had arguments but we needed to flush name

letta/streaming_utils.py CHANGED Viewed

@@ -264,39 +264,100 @@ class FunctionArgumentsStreamHandler:
     def process_json_chunk(self, chunk: str) -> Optional[str]:
         """Process a chunk from the function arguments and return the plaintext version"""
-        # Use strip to handle only leading and trailing whitespace in control structures
-        if self.accumulating:
-            clean_chunk = chunk.strip()
-            if self.json_key in self.key_buffer:
-                if ":" in clean_chunk:
-                    self.in_message = True
-                    self.accumulating = False
-                    return None
+        clean_chunk = chunk.strip()
+        # Not in message yet: accumulate until we see '<json_key>': (robust to split fragments)
+        if not self.in_message:
+            if clean_chunk == "{":
+                self.key_buffer = ""
+                self.accumulating = True
+                return None
             self.key_buffer += clean_chunk
+            if self.json_key in self.key_buffer and ":" in clean_chunk:
+                # Enter value mode; attempt to extract inline content if it exists in this same chunk
+                self.in_message = True
+                self.accumulating = False
+                # Try to find the first quote after the colon within the original (unstripped) chunk
+                s = chunk
+                colon_idx = s.find(":")
+                if colon_idx != -1:
+                    q_idx = s.find('"', colon_idx + 1)
+                    if q_idx != -1:
+                        self.message_started = True
+                        rem = s[q_idx + 1 :]
+                        # Check if this same chunk also contains the terminating quote (and optional delimiter)
+                        j = len(rem) - 1
+                        while j >= 0 and rem[j] in " \t\r\n":
+                            j -= 1
+                        if j >= 1 and rem[j - 1] == '"' and rem[j] in ",}]":
+                            out = rem[: j - 1]
+                            self.in_message = False
+                            self.message_started = False
+                            return out
+                        if j >= 0 and rem[j] == '"':
+                            out = rem[:j]
+                            self.in_message = False
+                            self.message_started = False
+                            return out
+                        # No terminator yet; emit remainder as content
+                        return rem
+                return None
+            if clean_chunk == "}":
+                self.in_message = False
+                self.message_started = False
+                self.key_buffer = ""
             return None
+        # Inside message value
         if self.in_message:
-            if chunk.strip() == '"' and self.message_started:
+            # Bare opening/closing quote tokens
+            if clean_chunk == '"' and self.message_started:
                 self.in_message = False
                 self.message_started = False
                 return None
-            if not self.message_started and chunk.strip() == '"':
+            if not self.message_started and clean_chunk == '"':
                 self.message_started = True
                 return None
             if self.message_started:
-                if chunk.strip().endswith('"'):
+                # Detect closing patterns: '"', '",', '"}' (with optional whitespace)
+                i = len(chunk) - 1
+                while i >= 0 and chunk[i] in " \t\r\n":
+                    i -= 1
+                if i >= 1 and chunk[i - 1] == '"' and chunk[i] in ",}]":
+                    out = chunk[: i - 1]
                     self.in_message = False
-                    return chunk.rstrip('"\n')
+                    self.message_started = False
+                    return out
+                if i >= 0 and chunk[i] == '"':
+                    out = chunk[:i]
+                    self.in_message = False
+                    self.message_started = False
+                    return out
+                # Otherwise, still mid-string
                 return chunk
-        if chunk.strip() == "{":
-            self.key_buffer = ""
-            self.accumulating = True
-            return None
-        if chunk.strip() == "}":
+        if clean_chunk == "}":
             self.in_message = False
             self.message_started = False
+            self.key_buffer = ""
             return None
         return None
+def sanitize_streamed_message_content(text: str) -> str:
+    """Remove trailing JSON delimiters that can leak into assistant text.
+    Specifically handles cases where a message string is immediately followed
+    by a JSON delimiter in the stream (e.g., '"', '",', '"}', '" ]').
+    Internal commas inside the message are preserved.
+    """
+    if not text:
+        return text
+    t = text.rstrip()
+    # strip trailing quote + delimiter
+    if len(t) >= 2 and t[-2] == '"' and t[-1] in ",}]":
+        return t[:-2]
+    # strip lone trailing quote
+    if t.endswith('"'):
+        return t[:-1]
+    return t

{letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: letta-nightly
-Version: 0.11.7.dev20250913103940
+Version: 0.11.7.dev20250914103918
 Summary: Create LLM agents with long-term memory and custom tools
 Author-email: Letta Team <contact@letta.com>
 License: Apache License

{letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/RECORD RENAMED Viewed

@@ -11,7 +11,7 @@ letta/memory.py,sha256=l5iNhLAR_xzgTb0GBlQx4SVgH8kuZh8siJdC_CFPKEs,4278
 letta/pytest.ini,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/settings.py,sha256=QEjNUwRXGBgsQpQAs2kksQmGN5CbxKlxPPydrklx_Ms,15011
 letta/streaming_interface.py,sha256=rPMfwUcjqITWk2tVqFQm1hmP99tU2IOHg9gU2dgPSo8,16400
-letta/streaming_utils.py,sha256=_UhLa0EtUkd6WL_oBYIU65tDcJ9jf3uWEHuzfQ4HCa8,13769
+letta/streaming_utils.py,sha256=ZRFGFpQqn9ujCEbgZdLM7yTjiuNNvqQ47sNhV8ix-yQ,16553
 letta/system.py,sha256=kHF7n3Viq7gV5UIUEXixod2gWa2jroUgztpEzMC1Sew,8925
 letta/utils.py,sha256=bSq3St7MUw9gN1g0ICdOhNNaUFYBC3EfJLG6qsRLSFA,43290
 letta/adapters/letta_llm_adapter.py,sha256=11wkOkEQfPXUuJoJxbK22wCa-8gnWiDAb3UOXOxLt5U,3427
@@ -85,7 +85,7 @@ letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k
 letta/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/interfaces/anthropic_streaming_interface.py,sha256=0VyK8kTRgCLNDLQN6vX1gJ0dfJhqguL_NL1GYgFr6fU,25614
 letta/interfaces/openai_chat_completions_streaming_interface.py,sha256=3xHXh8cW79EkiMUTYfvcH_s92nkLjxXfvtVOVC3bfLo,5050
-letta/interfaces/openai_streaming_interface.py,sha256=abmtQhWWbXSZGTPBPbMGuAJCyMo9euwttPsjI6joiVU,23768
+letta/interfaces/openai_streaming_interface.py,sha256=YLArar2ypOEaVt7suJxpg1QZr0ErwEmPSEVhzaP6JWc,24166
 letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
 letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
@@ -340,7 +340,7 @@ letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
 letta/server/rest_api/app.py,sha256=T3LLveXRJmfWqR0uEzoaLY8LXwYrwCQGb80XMbSCDUo,21172
 letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
 letta/server/rest_api/chat_completions_interface.py,sha256=-7wO7pNBWXMqblVkJpuZ8JPJ-LjudLTtT6BJu-q_XAM,11138
-letta/server/rest_api/interface.py,sha256=X5NZ8oerDcipG9y1AfD92zJ_2TgVMO4eJ42RP82GFF8,70952
+letta/server/rest_api/interface.py,sha256=_GQfKYUp9w4Wo2HSE_8Ff7QU16t1blspLaqmukpER9s,67099
 letta/server/rest_api/json_parser.py,sha256=yoakaCkSMdf0Y_pyILoFKZlvzXeqF-E1KNeHzatLMDc,9157
 letta/server/rest_api/redis_stream_manager.py,sha256=hz85CigFWdLkK1FWUmF-i6ObgoKkuoEgkiwshZ6QPKI,10764
 letta/server/rest_api/static_files.py,sha256=NG8sN4Z5EJ8JVQdj19tkFa9iQ1kBPTab9f_CUxd_u4Q,3143
@@ -470,8 +470,8 @@ letta/templates/sandbox_code_file_async.py.j2,sha256=lb7nh_P2W9VZHzU_9TxSCEMUod7
 letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUIXjgXKiizuc,842
 letta/templates/template_helper.py,sha256=HkG3zwRc5NVGmSTQu5PUTpz7LevK43bzXVaQuN8urf0,1634
 letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
-letta_nightly-0.11.7.dev20250913103940.dist-info/METADATA,sha256=bBw5qZ0Uuj_L5Hs-2tji7ZiKtmp3V0LWaPeCiOkGV9A,24424
-letta_nightly-0.11.7.dev20250913103940.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-letta_nightly-0.11.7.dev20250913103940.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
-letta_nightly-0.11.7.dev20250913103940.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
-letta_nightly-0.11.7.dev20250913103940.dist-info/RECORD,,
+letta_nightly-0.11.7.dev20250914103918.dist-info/METADATA,sha256=znAgbibaDvvLthC_McJ-W-HokPJdRIUijKN7KtgqoE0,24424
+letta_nightly-0.11.7.dev20250914103918.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+letta_nightly-0.11.7.dev20250914103918.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
+letta_nightly-0.11.7.dev20250914103918.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
+letta_nightly-0.11.7.dev20250914103918.dist-info/RECORD,,

{letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/WHEEL RENAMED Viewed

File without changes

{letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

letta-nightly 0.11.7.dev20250913103940__py3-none-any.whl → 0.11.7.dev20250914103918__py3-none-any.whl

letta-nightly 0.11.7.dev20250913103940py3-none-any.whl → 0.11.7.dev20250914103918py3-none-any.whl