letta-nightly 0.11.7.dev20250913103940__py3-none-any.whl → 0.11.7.dev20250914103918__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/interfaces/openai_streaming_interface.py +14 -5
- letta/server/rest_api/interface.py +22 -75
- letta/streaming_utils.py +79 -18
- {letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/METADATA +1 -1
- {letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/RECORD +8 -8
- {letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250913103940.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/licenses/LICENSE +0 -0
@@ -24,7 +24,11 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
|
24
24
|
from letta.schemas.message import Message
|
25
25
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
26
26
|
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
27
|
-
from letta.streaming_utils import
|
27
|
+
from letta.streaming_utils import (
|
28
|
+
FunctionArgumentsStreamHandler,
|
29
|
+
JSONInnerThoughtsExtractor,
|
30
|
+
sanitize_streamed_message_content,
|
31
|
+
)
|
28
32
|
from letta.utils import count_tokens
|
29
33
|
|
30
34
|
logger = get_logger(__name__)
|
@@ -278,8 +282,6 @@ class OpenAIStreamingInterface:
|
|
278
282
|
self.prev_assistant_message_id = self.function_id_buffer
|
279
283
|
# Reset message reader at the start of a new send_message stream
|
280
284
|
self.assistant_message_json_reader.reset()
|
281
|
-
self.assistant_message_json_reader.in_message = True
|
282
|
-
self.assistant_message_json_reader.message_started = True
|
283
285
|
|
284
286
|
else:
|
285
287
|
if prev_message_type and prev_message_type != "tool_call_message":
|
@@ -334,8 +336,15 @@ class OpenAIStreamingInterface:
|
|
334
336
|
self.last_flushed_function_name is not None
|
335
337
|
and self.last_flushed_function_name == self.assistant_message_tool_name
|
336
338
|
):
|
337
|
-
# Minimal, robust extraction: only emit the value of "message"
|
338
|
-
|
339
|
+
# Minimal, robust extraction: only emit the value of "message".
|
340
|
+
# If we buffered a prefix while name was streaming, feed it first.
|
341
|
+
if self.function_args_buffer:
|
342
|
+
payload = self.function_args_buffer + tool_call.function.arguments
|
343
|
+
self.function_args_buffer = None
|
344
|
+
else:
|
345
|
+
payload = tool_call.function.arguments
|
346
|
+
extracted = self.assistant_message_json_reader.process_json_chunk(payload)
|
347
|
+
extracted = sanitize_streamed_message_content(extracted or "")
|
339
348
|
if extracted:
|
340
349
|
if prev_message_type and prev_message_type != "assistant_message":
|
341
350
|
message_index += 1
|
@@ -808,86 +808,33 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
808
808
|
# If there was nothing in the name buffer, we can proceed to
|
809
809
|
# output the arguments chunk as a ToolCallMessage
|
810
810
|
else:
|
811
|
-
#
|
811
|
+
# use_assistant_message means we should emit only the value of "message"
|
812
812
|
if self.use_assistant_message and (
|
813
813
|
self.last_flushed_function_name is not None
|
814
814
|
and self.last_flushed_function_name == self.assistant_message_tool_name
|
815
815
|
):
|
816
|
-
#
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
updates_main_json = None
|
825
|
-
|
826
|
-
else:
|
827
|
-
# Some hardcoding to strip off the trailing "}"
|
828
|
-
if updates_main_json in ["}", '"}']:
|
829
|
-
updates_main_json = None
|
830
|
-
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
831
|
-
updates_main_json = updates_main_json[:-1]
|
832
|
-
|
833
|
-
if not updates_main_json:
|
834
|
-
# early exit to turn into content mode
|
816
|
+
# Feed any buffered prefix first to avoid missing the start of the value
|
817
|
+
payload = (self.function_args_buffer or "") + (updates_main_json or "")
|
818
|
+
self.function_args_buffer = None
|
819
|
+
cleaned = self.streaming_chat_completion_json_reader.process_json_chunk(payload)
|
820
|
+
from letta.streaming_utils import sanitize_streamed_message_content
|
821
|
+
|
822
|
+
cleaned = sanitize_streamed_message_content(cleaned or "")
|
823
|
+
if not cleaned:
|
835
824
|
return None
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
name=name,
|
850
|
-
otid=Message.generate_otid_from_id(message_id, message_index),
|
851
|
-
)
|
852
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
853
|
-
if self.function_id_buffer:
|
854
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
855
|
-
# clear buffer
|
856
|
-
self.function_args_buffer = None
|
857
|
-
self.function_id_buffer = None
|
858
|
-
|
859
|
-
else:
|
860
|
-
# If there's no buffer to clear, just output a new chunk with new data
|
861
|
-
# TODO: THIS IS HORRIBLE
|
862
|
-
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
863
|
-
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
864
|
-
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
865
|
-
|
866
|
-
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
867
|
-
self.assistant_message_tool_kwarg
|
868
|
-
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
869
|
-
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
870
|
-
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
871
|
-
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
872
|
-
diff = new_content.replace(prev_content, "", 1)
|
873
|
-
self.current_json_parse_result = parsed_args
|
874
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
875
|
-
message_index += 1
|
876
|
-
processed_chunk = AssistantMessage(
|
877
|
-
id=message_id,
|
878
|
-
date=message_date,
|
879
|
-
content=diff,
|
880
|
-
name=name,
|
881
|
-
otid=Message.generate_otid_from_id(message_id, message_index),
|
882
|
-
)
|
883
|
-
else:
|
884
|
-
return None
|
885
|
-
|
886
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
887
|
-
if self.function_id_buffer:
|
888
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
889
|
-
# clear buffers
|
890
|
-
self.function_id_buffer = None
|
825
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
826
|
+
message_index += 1
|
827
|
+
processed_chunk = AssistantMessage(
|
828
|
+
id=message_id,
|
829
|
+
date=message_date,
|
830
|
+
content=cleaned,
|
831
|
+
name=name,
|
832
|
+
otid=Message.generate_otid_from_id(message_id, message_index),
|
833
|
+
)
|
834
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
835
|
+
if self.function_id_buffer:
|
836
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
837
|
+
# Do not clear function_id_buffer here — we may still need it
|
891
838
|
else:
|
892
839
|
# There may be a buffer from a previous chunk, for example
|
893
840
|
# if the previous chunk had arguments but we needed to flush name
|
letta/streaming_utils.py
CHANGED
@@ -264,39 +264,100 @@ class FunctionArgumentsStreamHandler:
|
|
264
264
|
|
265
265
|
def process_json_chunk(self, chunk: str) -> Optional[str]:
|
266
266
|
"""Process a chunk from the function arguments and return the plaintext version"""
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
if
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
return None
|
267
|
+
clean_chunk = chunk.strip()
|
268
|
+
# Not in message yet: accumulate until we see '<json_key>': (robust to split fragments)
|
269
|
+
if not self.in_message:
|
270
|
+
if clean_chunk == "{":
|
271
|
+
self.key_buffer = ""
|
272
|
+
self.accumulating = True
|
273
|
+
return None
|
275
274
|
self.key_buffer += clean_chunk
|
275
|
+
if self.json_key in self.key_buffer and ":" in clean_chunk:
|
276
|
+
# Enter value mode; attempt to extract inline content if it exists in this same chunk
|
277
|
+
self.in_message = True
|
278
|
+
self.accumulating = False
|
279
|
+
# Try to find the first quote after the colon within the original (unstripped) chunk
|
280
|
+
s = chunk
|
281
|
+
colon_idx = s.find(":")
|
282
|
+
if colon_idx != -1:
|
283
|
+
q_idx = s.find('"', colon_idx + 1)
|
284
|
+
if q_idx != -1:
|
285
|
+
self.message_started = True
|
286
|
+
rem = s[q_idx + 1 :]
|
287
|
+
# Check if this same chunk also contains the terminating quote (and optional delimiter)
|
288
|
+
j = len(rem) - 1
|
289
|
+
while j >= 0 and rem[j] in " \t\r\n":
|
290
|
+
j -= 1
|
291
|
+
if j >= 1 and rem[j - 1] == '"' and rem[j] in ",}]":
|
292
|
+
out = rem[: j - 1]
|
293
|
+
self.in_message = False
|
294
|
+
self.message_started = False
|
295
|
+
return out
|
296
|
+
if j >= 0 and rem[j] == '"':
|
297
|
+
out = rem[:j]
|
298
|
+
self.in_message = False
|
299
|
+
self.message_started = False
|
300
|
+
return out
|
301
|
+
# No terminator yet; emit remainder as content
|
302
|
+
return rem
|
303
|
+
return None
|
304
|
+
if clean_chunk == "}":
|
305
|
+
self.in_message = False
|
306
|
+
self.message_started = False
|
307
|
+
self.key_buffer = ""
|
276
308
|
return None
|
277
309
|
|
310
|
+
# Inside message value
|
278
311
|
if self.in_message:
|
279
|
-
|
312
|
+
# Bare opening/closing quote tokens
|
313
|
+
if clean_chunk == '"' and self.message_started:
|
280
314
|
self.in_message = False
|
281
315
|
self.message_started = False
|
282
316
|
return None
|
283
|
-
if not self.message_started and
|
317
|
+
if not self.message_started and clean_chunk == '"':
|
284
318
|
self.message_started = True
|
285
319
|
return None
|
286
320
|
if self.message_started:
|
287
|
-
|
321
|
+
# Detect closing patterns: '"', '",', '"}' (with optional whitespace)
|
322
|
+
i = len(chunk) - 1
|
323
|
+
while i >= 0 and chunk[i] in " \t\r\n":
|
324
|
+
i -= 1
|
325
|
+
if i >= 1 and chunk[i - 1] == '"' and chunk[i] in ",}]":
|
326
|
+
out = chunk[: i - 1]
|
288
327
|
self.in_message = False
|
289
|
-
|
328
|
+
self.message_started = False
|
329
|
+
return out
|
330
|
+
if i >= 0 and chunk[i] == '"':
|
331
|
+
out = chunk[:i]
|
332
|
+
self.in_message = False
|
333
|
+
self.message_started = False
|
334
|
+
return out
|
335
|
+
# Otherwise, still mid-string
|
290
336
|
return chunk
|
291
337
|
|
292
|
-
if
|
293
|
-
self.key_buffer = ""
|
294
|
-
self.accumulating = True
|
295
|
-
return None
|
296
|
-
|
297
|
-
if chunk.strip() == "}":
|
338
|
+
if clean_chunk == "}":
|
298
339
|
self.in_message = False
|
299
340
|
self.message_started = False
|
341
|
+
self.key_buffer = ""
|
300
342
|
return None
|
301
343
|
|
302
344
|
return None
|
345
|
+
|
346
|
+
|
347
|
+
def sanitize_streamed_message_content(text: str) -> str:
|
348
|
+
"""Remove trailing JSON delimiters that can leak into assistant text.
|
349
|
+
|
350
|
+
Specifically handles cases where a message string is immediately followed
|
351
|
+
by a JSON delimiter in the stream (e.g., '"', '",', '"}', '" ]').
|
352
|
+
Internal commas inside the message are preserved.
|
353
|
+
"""
|
354
|
+
if not text:
|
355
|
+
return text
|
356
|
+
t = text.rstrip()
|
357
|
+
# strip trailing quote + delimiter
|
358
|
+
if len(t) >= 2 and t[-2] == '"' and t[-1] in ",}]":
|
359
|
+
return t[:-2]
|
360
|
+
# strip lone trailing quote
|
361
|
+
if t.endswith('"'):
|
362
|
+
return t[:-1]
|
363
|
+
return t
|
@@ -11,7 +11,7 @@ letta/memory.py,sha256=l5iNhLAR_xzgTb0GBlQx4SVgH8kuZh8siJdC_CFPKEs,4278
|
|
11
11
|
letta/pytest.ini,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
letta/settings.py,sha256=QEjNUwRXGBgsQpQAs2kksQmGN5CbxKlxPPydrklx_Ms,15011
|
13
13
|
letta/streaming_interface.py,sha256=rPMfwUcjqITWk2tVqFQm1hmP99tU2IOHg9gU2dgPSo8,16400
|
14
|
-
letta/streaming_utils.py,sha256=
|
14
|
+
letta/streaming_utils.py,sha256=ZRFGFpQqn9ujCEbgZdLM7yTjiuNNvqQ47sNhV8ix-yQ,16553
|
15
15
|
letta/system.py,sha256=kHF7n3Viq7gV5UIUEXixod2gWa2jroUgztpEzMC1Sew,8925
|
16
16
|
letta/utils.py,sha256=bSq3St7MUw9gN1g0ICdOhNNaUFYBC3EfJLG6qsRLSFA,43290
|
17
17
|
letta/adapters/letta_llm_adapter.py,sha256=11wkOkEQfPXUuJoJxbK22wCa-8gnWiDAb3UOXOxLt5U,3427
|
@@ -85,7 +85,7 @@ letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k
|
|
85
85
|
letta/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
letta/interfaces/anthropic_streaming_interface.py,sha256=0VyK8kTRgCLNDLQN6vX1gJ0dfJhqguL_NL1GYgFr6fU,25614
|
87
87
|
letta/interfaces/openai_chat_completions_streaming_interface.py,sha256=3xHXh8cW79EkiMUTYfvcH_s92nkLjxXfvtVOVC3bfLo,5050
|
88
|
-
letta/interfaces/openai_streaming_interface.py,sha256=
|
88
|
+
letta/interfaces/openai_streaming_interface.py,sha256=YLArar2ypOEaVt7suJxpg1QZr0ErwEmPSEVhzaP6JWc,24166
|
89
89
|
letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
|
90
90
|
letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
91
|
letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
|
@@ -340,7 +340,7 @@ letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
340
340
|
letta/server/rest_api/app.py,sha256=T3LLveXRJmfWqR0uEzoaLY8LXwYrwCQGb80XMbSCDUo,21172
|
341
341
|
letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
|
342
342
|
letta/server/rest_api/chat_completions_interface.py,sha256=-7wO7pNBWXMqblVkJpuZ8JPJ-LjudLTtT6BJu-q_XAM,11138
|
343
|
-
letta/server/rest_api/interface.py,sha256=
|
343
|
+
letta/server/rest_api/interface.py,sha256=_GQfKYUp9w4Wo2HSE_8Ff7QU16t1blspLaqmukpER9s,67099
|
344
344
|
letta/server/rest_api/json_parser.py,sha256=yoakaCkSMdf0Y_pyILoFKZlvzXeqF-E1KNeHzatLMDc,9157
|
345
345
|
letta/server/rest_api/redis_stream_manager.py,sha256=hz85CigFWdLkK1FWUmF-i6ObgoKkuoEgkiwshZ6QPKI,10764
|
346
346
|
letta/server/rest_api/static_files.py,sha256=NG8sN4Z5EJ8JVQdj19tkFa9iQ1kBPTab9f_CUxd_u4Q,3143
|
@@ -470,8 +470,8 @@ letta/templates/sandbox_code_file_async.py.j2,sha256=lb7nh_P2W9VZHzU_9TxSCEMUod7
|
|
470
470
|
letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUIXjgXKiizuc,842
|
471
471
|
letta/templates/template_helper.py,sha256=HkG3zwRc5NVGmSTQu5PUTpz7LevK43bzXVaQuN8urf0,1634
|
472
472
|
letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
|
473
|
-
letta_nightly-0.11.7.
|
474
|
-
letta_nightly-0.11.7.
|
475
|
-
letta_nightly-0.11.7.
|
476
|
-
letta_nightly-0.11.7.
|
477
|
-
letta_nightly-0.11.7.
|
473
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/METADATA,sha256=znAgbibaDvvLthC_McJ-W-HokPJdRIUijKN7KtgqoE0,24424
|
474
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
475
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
|
476
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
|
477
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/RECORD,,
|
File without changes
|
File without changes
|