letta-nightly 0.5.0.dev20241021104213__py3-none-any.whl → 0.5.0.dev20241022104124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/llm_api/helpers.py +45 -19
- letta/llm_api/openai.py +17 -4
- letta/server/rest_api/interface.py +208 -99
- letta/server/rest_api/routers/v1/agents.py +11 -3
- letta/streaming_utils.py +270 -0
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/METADATA +1 -1
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/RECORD +10 -9
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/LICENSE +0 -0
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/WHEEL +0 -0
- {letta_nightly-0.5.0.dev20241021104213.dist-info → letta_nightly-0.5.0.dev20241022104124.dist-info}/entry_points.txt +0 -0
letta/llm_api/helpers.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import json
|
|
3
3
|
import warnings
|
|
4
|
+
from collections import OrderedDict
|
|
4
5
|
from typing import Any, List, Union
|
|
5
6
|
|
|
6
7
|
import requests
|
|
@@ -10,6 +11,30 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
|
10
11
|
from letta.utils import json_dumps, printd
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
def convert_to_structured_output(openai_function: dict) -> dict:
|
|
15
|
+
"""Convert function call objects to structured output objects
|
|
16
|
+
|
|
17
|
+
See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
|
|
18
|
+
"""
|
|
19
|
+
structured_output = {
|
|
20
|
+
"name": openai_function["name"],
|
|
21
|
+
"description": openai_function["description"],
|
|
22
|
+
"strict": True,
|
|
23
|
+
"parameters": {"type": "object", "properties": {}, "additionalProperties": False, "required": []},
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
for param, details in openai_function["parameters"]["properties"].items():
|
|
27
|
+
structured_output["parameters"]["properties"][param] = {"type": details["type"], "description": details["description"]}
|
|
28
|
+
|
|
29
|
+
if "enum" in details:
|
|
30
|
+
structured_output["parameters"]["properties"][param]["enum"] = details["enum"]
|
|
31
|
+
|
|
32
|
+
# Add all properties to required list
|
|
33
|
+
structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
|
|
34
|
+
|
|
35
|
+
return structured_output
|
|
36
|
+
|
|
37
|
+
|
|
13
38
|
def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]:
|
|
14
39
|
printd(f"Sending request to {url}")
|
|
15
40
|
try:
|
|
@@ -78,33 +103,34 @@ def add_inner_thoughts_to_functions(
|
|
|
78
103
|
inner_thoughts_key: str,
|
|
79
104
|
inner_thoughts_description: str,
|
|
80
105
|
inner_thoughts_required: bool = True,
|
|
81
|
-
# inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
|
|
82
106
|
) -> List[dict]:
|
|
83
|
-
"""Add an inner_thoughts kwarg to every function in the provided list"""
|
|
84
|
-
# return copies
|
|
107
|
+
"""Add an inner_thoughts kwarg to every function in the provided list, ensuring it's the first parameter"""
|
|
85
108
|
new_functions = []
|
|
86
|
-
|
|
87
|
-
# functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
|
|
88
109
|
for function_object in functions:
|
|
89
|
-
|
|
90
|
-
required_params = list(function_object["parameters"]["required"])
|
|
110
|
+
new_function_object = copy.deepcopy(function_object)
|
|
91
111
|
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
112
|
+
# Create a new OrderedDict with inner_thoughts as the first item
|
|
113
|
+
new_properties = OrderedDict()
|
|
114
|
+
new_properties[inner_thoughts_key] = {
|
|
115
|
+
"type": "string",
|
|
116
|
+
"description": inner_thoughts_description,
|
|
117
|
+
}
|
|
98
118
|
|
|
99
|
-
#
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
119
|
+
# Add the rest of the properties
|
|
120
|
+
new_properties.update(function_object["parameters"]["properties"])
|
|
121
|
+
|
|
122
|
+
# Cast OrderedDict back to a regular dict
|
|
123
|
+
new_function_object["parameters"]["properties"] = dict(new_properties)
|
|
124
|
+
|
|
125
|
+
# Update required parameters if necessary
|
|
126
|
+
if inner_thoughts_required:
|
|
127
|
+
required_params = new_function_object["parameters"].get("required", [])
|
|
128
|
+
if inner_thoughts_key not in required_params:
|
|
129
|
+
required_params.insert(0, inner_thoughts_key)
|
|
130
|
+
new_function_object["parameters"]["required"] = required_params
|
|
104
131
|
|
|
105
132
|
new_functions.append(new_function_object)
|
|
106
133
|
|
|
107
|
-
# return a list of copies
|
|
108
134
|
return new_functions
|
|
109
135
|
|
|
110
136
|
|
letta/llm_api/openai.py
CHANGED
|
@@ -9,7 +9,11 @@ from httpx_sse._exceptions import SSEError
|
|
|
9
9
|
|
|
10
10
|
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
11
11
|
from letta.errors import LLMError
|
|
12
|
-
from letta.llm_api.helpers import
|
|
12
|
+
from letta.llm_api.helpers import (
|
|
13
|
+
add_inner_thoughts_to_functions,
|
|
14
|
+
convert_to_structured_output,
|
|
15
|
+
make_post_request,
|
|
16
|
+
)
|
|
13
17
|
from letta.local_llm.constants import (
|
|
14
18
|
INNER_THOUGHTS_KWARG,
|
|
15
19
|
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
@@ -112,7 +116,7 @@ def build_openai_chat_completions_request(
|
|
|
112
116
|
use_tool_naming: bool,
|
|
113
117
|
max_tokens: Optional[int],
|
|
114
118
|
) -> ChatCompletionRequest:
|
|
115
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
|
119
|
+
if functions and llm_config.put_inner_thoughts_in_kwargs:
|
|
116
120
|
functions = add_inner_thoughts_to_functions(
|
|
117
121
|
functions=functions,
|
|
118
122
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
@@ -154,8 +158,8 @@ def build_openai_chat_completions_request(
|
|
|
154
158
|
)
|
|
155
159
|
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
|
156
160
|
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
|
157
|
-
if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
|
158
|
-
|
|
161
|
+
# if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
|
162
|
+
# data.response_format = {"type": "json_object"}
|
|
159
163
|
|
|
160
164
|
if "inference.memgpt.ai" in llm_config.model_endpoint:
|
|
161
165
|
# override user id for inference.memgpt.ai
|
|
@@ -362,6 +366,8 @@ def openai_chat_completions_process_stream(
|
|
|
362
366
|
chat_completion_response.usage.completion_tokens = n_chunks
|
|
363
367
|
chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
|
|
364
368
|
|
|
369
|
+
assert len(chat_completion_response.choices) > 0, chat_completion_response
|
|
370
|
+
|
|
365
371
|
# printd(chat_completion_response)
|
|
366
372
|
return chat_completion_response
|
|
367
373
|
|
|
@@ -461,6 +467,13 @@ def openai_chat_completions_request_stream(
|
|
|
461
467
|
data.pop("tools")
|
|
462
468
|
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
|
|
463
469
|
|
|
470
|
+
if "tools" in data:
|
|
471
|
+
for tool in data["tools"]:
|
|
472
|
+
# tool["strict"] = True
|
|
473
|
+
tool["function"] = convert_to_structured_output(tool["function"])
|
|
474
|
+
|
|
475
|
+
# print(f"\n\n\n\nData[tools]: {json.dumps(data['tools'], indent=2)}")
|
|
476
|
+
|
|
464
477
|
printd(f"Sending request to {url}")
|
|
465
478
|
try:
|
|
466
479
|
return _sse_post(url=url, data=data, headers=headers)
|
|
@@ -8,6 +8,7 @@ from typing import AsyncGenerator, Literal, Optional, Union
|
|
|
8
8
|
|
|
9
9
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
10
10
|
from letta.interface import AgentInterface
|
|
11
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
11
12
|
from letta.schemas.enums import MessageStreamStatus
|
|
12
13
|
from letta.schemas.letta_message import (
|
|
13
14
|
AssistantMessage,
|
|
@@ -23,9 +24,14 @@ from letta.schemas.letta_message import (
|
|
|
23
24
|
from letta.schemas.message import Message
|
|
24
25
|
from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
|
|
25
26
|
from letta.streaming_interface import AgentChunkStreamingInterface
|
|
27
|
+
from letta.streaming_utils import (
|
|
28
|
+
FunctionArgumentsStreamHandler,
|
|
29
|
+
JSONInnerThoughtsExtractor,
|
|
30
|
+
)
|
|
26
31
|
from letta.utils import is_utc_datetime
|
|
27
32
|
|
|
28
33
|
|
|
34
|
+
# TODO strip from code / deprecate
|
|
29
35
|
class QueuingInterface(AgentInterface):
|
|
30
36
|
"""Messages are queued inside an internal buffer and manually flushed"""
|
|
31
37
|
|
|
@@ -248,58 +254,6 @@ class QueuingInterface(AgentInterface):
|
|
|
248
254
|
self._queue_push(message_api=new_message, message_obj=msg_obj)
|
|
249
255
|
|
|
250
256
|
|
|
251
|
-
class FunctionArgumentsStreamHandler:
|
|
252
|
-
"""State machine that can process a stream of"""
|
|
253
|
-
|
|
254
|
-
def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
|
|
255
|
-
self.json_key = json_key
|
|
256
|
-
self.reset()
|
|
257
|
-
|
|
258
|
-
def reset(self):
|
|
259
|
-
self.in_message = False
|
|
260
|
-
self.key_buffer = ""
|
|
261
|
-
self.accumulating = False
|
|
262
|
-
self.message_started = False
|
|
263
|
-
|
|
264
|
-
def process_json_chunk(self, chunk: str) -> Optional[str]:
|
|
265
|
-
"""Process a chunk from the function arguments and return the plaintext version"""
|
|
266
|
-
|
|
267
|
-
# Use strip to handle only leading and trailing whitespace in control structures
|
|
268
|
-
if self.accumulating:
|
|
269
|
-
clean_chunk = chunk.strip()
|
|
270
|
-
if self.json_key in self.key_buffer:
|
|
271
|
-
if ":" in clean_chunk:
|
|
272
|
-
self.in_message = True
|
|
273
|
-
self.accumulating = False
|
|
274
|
-
return None
|
|
275
|
-
self.key_buffer += clean_chunk
|
|
276
|
-
return None
|
|
277
|
-
|
|
278
|
-
if self.in_message:
|
|
279
|
-
if chunk.strip() == '"' and self.message_started:
|
|
280
|
-
self.in_message = False
|
|
281
|
-
self.message_started = False
|
|
282
|
-
return None
|
|
283
|
-
if not self.message_started and chunk.strip() == '"':
|
|
284
|
-
self.message_started = True
|
|
285
|
-
return None
|
|
286
|
-
if self.message_started:
|
|
287
|
-
if chunk.strip().endswith('"'):
|
|
288
|
-
self.in_message = False
|
|
289
|
-
return chunk.rstrip('"\n')
|
|
290
|
-
return chunk
|
|
291
|
-
|
|
292
|
-
if chunk.strip() == "{":
|
|
293
|
-
self.key_buffer = ""
|
|
294
|
-
self.accumulating = True
|
|
295
|
-
return None
|
|
296
|
-
if chunk.strip() == "}":
|
|
297
|
-
self.in_message = False
|
|
298
|
-
self.message_started = False
|
|
299
|
-
return None
|
|
300
|
-
return None
|
|
301
|
-
|
|
302
|
-
|
|
303
257
|
class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
304
258
|
"""Maintain a generator that is a proxy for self.process_chunk()
|
|
305
259
|
|
|
@@ -316,9 +270,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
316
270
|
def __init__(
|
|
317
271
|
self,
|
|
318
272
|
multi_step=True,
|
|
273
|
+
# Related to if we want to try and pass back the AssistantMessage as a special case function
|
|
319
274
|
use_assistant_message=False,
|
|
320
275
|
assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
|
|
321
276
|
assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
|
|
277
|
+
# Related to if we expect inner_thoughts to be in the kwargs
|
|
278
|
+
inner_thoughts_in_kwargs=True,
|
|
279
|
+
inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
|
|
322
280
|
):
|
|
323
281
|
# If streaming mode, ignores base interface calls like .assistant_message, etc
|
|
324
282
|
self.streaming_mode = False
|
|
@@ -346,6 +304,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
346
304
|
self.assistant_message_function_name = assistant_message_function_name
|
|
347
305
|
self.assistant_message_function_kwarg = assistant_message_function_kwarg
|
|
348
306
|
|
|
307
|
+
# Support for inner_thoughts_in_kwargs
|
|
308
|
+
self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
|
|
309
|
+
self.inner_thoughts_kwarg = inner_thoughts_kwarg
|
|
310
|
+
# A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
|
|
311
|
+
self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=inner_thoughts_kwarg, wait_for_first_key=True)
|
|
312
|
+
# Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
|
|
313
|
+
self.function_name_buffer = None
|
|
314
|
+
self.function_args_buffer = None
|
|
315
|
+
|
|
349
316
|
# extra prints
|
|
350
317
|
self.debug = False
|
|
351
318
|
self.timeout = 30
|
|
@@ -365,16 +332,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
365
332
|
# Reset the event until a new item is pushed
|
|
366
333
|
self._event.clear()
|
|
367
334
|
|
|
368
|
-
# while self._active:
|
|
369
|
-
# # Wait until there is an item in the deque or the stream is deactivated
|
|
370
|
-
# await self._event.wait()
|
|
371
|
-
|
|
372
|
-
# while self._chunks:
|
|
373
|
-
# yield self._chunks.popleft()
|
|
374
|
-
|
|
375
|
-
# # Reset the event until a new item is pushed
|
|
376
|
-
# self._event.clear()
|
|
377
|
-
|
|
378
335
|
def get_generator(self) -> AsyncGenerator:
|
|
379
336
|
"""Get the generator that yields processed chunks."""
|
|
380
337
|
if not self._active:
|
|
@@ -419,18 +376,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
419
376
|
if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
|
|
420
377
|
self._push_to_buffer(self.multi_step_gen_indicator)
|
|
421
378
|
|
|
422
|
-
# self._active = False
|
|
423
|
-
# self._event.set() # Unblock the generator if it's waiting to allow it to complete
|
|
424
|
-
|
|
425
|
-
# if not self.multi_step:
|
|
426
|
-
# # end the stream
|
|
427
|
-
# self._active = False
|
|
428
|
-
# self._event.set() # Unblock the generator if it's waiting to allow it to complete
|
|
429
|
-
# else:
|
|
430
|
-
# # signal that a new step has started in the stream
|
|
431
|
-
# self._chunks.append(self.multi_step_indicator)
|
|
432
|
-
# self._event.set() # Signal that new data is available
|
|
433
|
-
|
|
434
379
|
def step_complete(self):
|
|
435
380
|
"""Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
|
|
436
381
|
if not self.multi_step:
|
|
@@ -443,8 +388,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
443
388
|
|
|
444
389
|
def step_yield(self):
|
|
445
390
|
"""If multi_step, this is the true 'stream_end' function."""
|
|
446
|
-
# if self.multi_step:
|
|
447
|
-
# end the stream
|
|
448
391
|
self._active = False
|
|
449
392
|
self._event.set() # Unblock the generator if it's waiting to allow it to complete
|
|
450
393
|
|
|
@@ -479,8 +422,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
479
422
|
elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
|
480
423
|
tool_call = message_delta.tool_calls[0]
|
|
481
424
|
|
|
425
|
+
# TODO(charles) merge into logic for internal_monologue
|
|
482
426
|
# special case for trapping `send_message`
|
|
483
427
|
if self.use_assistant_message and tool_call.function:
|
|
428
|
+
if self.inner_thoughts_in_kwargs:
|
|
429
|
+
raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
484
430
|
|
|
485
431
|
# If we just received a chunk with the message in it, we either enter "send_message" mode, or we do standard FunctionCallMessage passthrough mode
|
|
486
432
|
|
|
@@ -538,6 +484,181 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
538
484
|
),
|
|
539
485
|
)
|
|
540
486
|
|
|
487
|
+
elif self.inner_thoughts_in_kwargs and tool_call.function:
|
|
488
|
+
if self.use_assistant_message:
|
|
489
|
+
raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
490
|
+
|
|
491
|
+
processed_chunk = None
|
|
492
|
+
|
|
493
|
+
if tool_call.function.name:
|
|
494
|
+
# If we're waiting for the first key, then we should hold back the name
|
|
495
|
+
# ie add it to a buffer instead of returning it as a chunk
|
|
496
|
+
if self.function_name_buffer is None:
|
|
497
|
+
self.function_name_buffer = tool_call.function.name
|
|
498
|
+
else:
|
|
499
|
+
self.function_name_buffer += tool_call.function.name
|
|
500
|
+
|
|
501
|
+
if tool_call.function.arguments:
|
|
502
|
+
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
|
503
|
+
|
|
504
|
+
# If we have inner thoughts, we should output them as a chunk
|
|
505
|
+
if updates_inner_thoughts:
|
|
506
|
+
processed_chunk = InternalMonologue(
|
|
507
|
+
id=message_id,
|
|
508
|
+
date=message_date,
|
|
509
|
+
internal_monologue=updates_inner_thoughts,
|
|
510
|
+
)
|
|
511
|
+
# Additionally inner thoughts may stream back with a chunk of main JSON
|
|
512
|
+
# In that case, since we can only return a chunk at a time, we should buffer it
|
|
513
|
+
if updates_main_json:
|
|
514
|
+
if self.function_args_buffer is None:
|
|
515
|
+
self.function_args_buffer = updates_main_json
|
|
516
|
+
else:
|
|
517
|
+
self.function_args_buffer += updates_main_json
|
|
518
|
+
|
|
519
|
+
# If we have main_json, we should output a FunctionCallMessage
|
|
520
|
+
elif updates_main_json:
|
|
521
|
+
# If there's something in the function_name buffer, we should release it first
|
|
522
|
+
# NOTE: we could output it as part of a chunk that has both name and args,
|
|
523
|
+
# however the frontend may expect name first, then args, so to be
|
|
524
|
+
# safe we'll output name first in a separate chunk
|
|
525
|
+
if self.function_name_buffer:
|
|
526
|
+
processed_chunk = FunctionCallMessage(
|
|
527
|
+
id=message_id,
|
|
528
|
+
date=message_date,
|
|
529
|
+
function_call=FunctionCallDelta(name=self.function_name_buffer, arguments=None),
|
|
530
|
+
)
|
|
531
|
+
# Clear the buffer
|
|
532
|
+
self.function_name_buffer = None
|
|
533
|
+
# Since we're clearing the name buffer, we should store
|
|
534
|
+
# any updates to the arguments inside a separate buffer
|
|
535
|
+
if updates_main_json:
|
|
536
|
+
# Add any main_json updates to the arguments buffer
|
|
537
|
+
if self.function_args_buffer is None:
|
|
538
|
+
self.function_args_buffer = updates_main_json
|
|
539
|
+
else:
|
|
540
|
+
self.function_args_buffer += updates_main_json
|
|
541
|
+
|
|
542
|
+
# If there was nothing in the name buffer, we can proceed to
|
|
543
|
+
# output the arguments chunk as a FunctionCallMessage
|
|
544
|
+
else:
|
|
545
|
+
# There may be a buffer from a previous chunk, for example
|
|
546
|
+
# if the previous chunk had arguments but we needed to flush name
|
|
547
|
+
if self.function_args_buffer:
|
|
548
|
+
# In this case, we should release the buffer + new data at once
|
|
549
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
|
550
|
+
processed_chunk = FunctionCallMessage(
|
|
551
|
+
id=message_id,
|
|
552
|
+
date=message_date,
|
|
553
|
+
function_call=FunctionCallDelta(name=None, arguments=combined_chunk),
|
|
554
|
+
)
|
|
555
|
+
# clear buffer
|
|
556
|
+
self.function_args_buffer = None
|
|
557
|
+
else:
|
|
558
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
|
559
|
+
processed_chunk = FunctionCallMessage(
|
|
560
|
+
id=message_id,
|
|
561
|
+
date=message_date,
|
|
562
|
+
function_call=FunctionCallDelta(name=None, arguments=updates_main_json),
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# # If there's something in the main_json buffer, we should add if to the arguments and release it together
|
|
566
|
+
# tool_call_delta = {}
|
|
567
|
+
# if tool_call.id:
|
|
568
|
+
# tool_call_delta["id"] = tool_call.id
|
|
569
|
+
# if tool_call.function:
|
|
570
|
+
# if tool_call.function.arguments:
|
|
571
|
+
# # tool_call_delta["arguments"] = tool_call.function.arguments
|
|
572
|
+
# # NOTE: using the stripped one
|
|
573
|
+
# tool_call_delta["arguments"] = updates_main_json
|
|
574
|
+
# # We use the buffered name
|
|
575
|
+
# if self.function_name_buffer:
|
|
576
|
+
# tool_call_delta["name"] = self.function_name_buffer
|
|
577
|
+
# # if tool_call.function.name:
|
|
578
|
+
# # tool_call_delta["name"] = tool_call.function.name
|
|
579
|
+
|
|
580
|
+
# processed_chunk = FunctionCallMessage(
|
|
581
|
+
# id=message_id,
|
|
582
|
+
# date=message_date,
|
|
583
|
+
# function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
|
|
584
|
+
# )
|
|
585
|
+
|
|
586
|
+
else:
|
|
587
|
+
processed_chunk = None
|
|
588
|
+
|
|
589
|
+
return processed_chunk
|
|
590
|
+
|
|
591
|
+
# # NOTE: this is a simplified version of the parsing code that:
|
|
592
|
+
# # (1) assumes that the inner_thoughts key will always come first
|
|
593
|
+
# # (2) assumes that there's no extra spaces in the stringified JSON
|
|
594
|
+
# # i.e., the prefix will look exactly like: "{\"variable\":\"}"
|
|
595
|
+
# if tool_call.function.arguments:
|
|
596
|
+
# self.function_args_buffer += tool_call.function.arguments
|
|
597
|
+
|
|
598
|
+
# # prefix_str = f'{{"\\"{self.inner_thoughts_kwarg}\\":\\"}}'
|
|
599
|
+
# prefix_str = f'{{"{self.inner_thoughts_kwarg}":'
|
|
600
|
+
# if self.function_args_buffer.startswith(prefix_str):
|
|
601
|
+
# print(f"Found prefix!!!: {self.function_args_buffer}")
|
|
602
|
+
# else:
|
|
603
|
+
# print(f"No prefix found: {self.function_args_buffer}")
|
|
604
|
+
|
|
605
|
+
# tool_call_delta = {}
|
|
606
|
+
# if tool_call.id:
|
|
607
|
+
# tool_call_delta["id"] = tool_call.id
|
|
608
|
+
# if tool_call.function:
|
|
609
|
+
# if tool_call.function.arguments:
|
|
610
|
+
# tool_call_delta["arguments"] = tool_call.function.arguments
|
|
611
|
+
# if tool_call.function.name:
|
|
612
|
+
# tool_call_delta["name"] = tool_call.function.name
|
|
613
|
+
|
|
614
|
+
# processed_chunk = FunctionCallMessage(
|
|
615
|
+
# id=message_id,
|
|
616
|
+
# date=message_date,
|
|
617
|
+
# function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
|
|
618
|
+
# )
|
|
619
|
+
|
|
620
|
+
# elif False and self.inner_thoughts_in_kwargs and tool_call.function:
|
|
621
|
+
# if self.use_assistant_message:
|
|
622
|
+
# raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
623
|
+
|
|
624
|
+
# if tool_call.function.arguments:
|
|
625
|
+
|
|
626
|
+
# Maintain a state machine to track if we're reading a key vs reading a value
|
|
627
|
+
# Technically we can we pre-key, post-key, pre-value, post-value
|
|
628
|
+
|
|
629
|
+
# for c in tool_call.function.arguments:
|
|
630
|
+
# if self.function_chunks_parsing_state == FunctionChunksParsingState.PRE_KEY:
|
|
631
|
+
# if c == '"':
|
|
632
|
+
# self.function_chunks_parsing_state = FunctionChunksParsingState.READING_KEY
|
|
633
|
+
# elif self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
|
|
634
|
+
# if c == '"':
|
|
635
|
+
# self.function_chunks_parsing_state = FunctionChunksParsingState.POST_KEY
|
|
636
|
+
|
|
637
|
+
# If we're reading a key:
|
|
638
|
+
# if self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
|
|
639
|
+
|
|
640
|
+
# We need to buffer the function arguments until we get complete keys
|
|
641
|
+
# We are reading stringified-JSON, so we need to check for keys in data that looks like:
|
|
642
|
+
# "arguments":"{\""
|
|
643
|
+
# "arguments":"inner"
|
|
644
|
+
# "arguments":"_th"
|
|
645
|
+
# "arguments":"ought"
|
|
646
|
+
# "arguments":"s"
|
|
647
|
+
# "arguments":"\":\""
|
|
648
|
+
|
|
649
|
+
# Once we get a complete key, check if the key matches
|
|
650
|
+
|
|
651
|
+
# If it does match, start processing the value (stringified-JSON string
|
|
652
|
+
# And with each new chunk, output it as a chunk of type InternalMonologue
|
|
653
|
+
|
|
654
|
+
# If the key doesn't match, then flush the buffer as a single FunctionCallMessage chunk
|
|
655
|
+
|
|
656
|
+
# If we're reading a value
|
|
657
|
+
|
|
658
|
+
# If we're reading the inner thoughts value, we output chunks of type InternalMonologue
|
|
659
|
+
|
|
660
|
+
# Otherwise, do simple chunks of FunctionCallMessage
|
|
661
|
+
|
|
541
662
|
else:
|
|
542
663
|
|
|
543
664
|
tool_call_delta = {}
|
|
@@ -563,7 +684,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
563
684
|
# skip if there's a finish
|
|
564
685
|
return None
|
|
565
686
|
else:
|
|
566
|
-
|
|
687
|
+
# Example case that would trigger here:
|
|
688
|
+
# id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
|
|
689
|
+
# choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
|
|
690
|
+
# created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
|
|
691
|
+
# model='gpt-4o-mini-2024-07-18'
|
|
692
|
+
# object='chat.completion.chunk'
|
|
693
|
+
warnings.warn(f"Couldn't find delta in chunk: {chunk}")
|
|
694
|
+
return None
|
|
567
695
|
|
|
568
696
|
return processed_chunk
|
|
569
697
|
|
|
@@ -663,6 +791,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
663
791
|
# "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
|
|
664
792
|
# "id": str(msg_obj.id) if msg_obj is not None else None,
|
|
665
793
|
# }
|
|
794
|
+
assert msg_obj is not None, "Internal monologue requires msg_obj references for metadata"
|
|
666
795
|
processed_chunk = InternalMonologue(
|
|
667
796
|
id=msg_obj.id,
|
|
668
797
|
date=msg_obj.created_at,
|
|
@@ -676,18 +805,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
676
805
|
def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
677
806
|
"""Letta uses send_message"""
|
|
678
807
|
|
|
679
|
-
#
|
|
680
|
-
|
|
681
|
-
# # create a fake "chunk" of a stream
|
|
682
|
-
# processed_chunk = {
|
|
683
|
-
# "assistant_message": msg,
|
|
684
|
-
# "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
|
|
685
|
-
# "id": str(msg_obj.id) if msg_obj is not None else None,
|
|
686
|
-
# }
|
|
687
|
-
|
|
688
|
-
# self._chunks.append(processed_chunk)
|
|
689
|
-
# self._event.set() # Signal that new data is available
|
|
690
|
-
|
|
808
|
+
# NOTE: this is a no-op, we handle this special case in function_message instead
|
|
691
809
|
return
|
|
692
810
|
|
|
693
811
|
def function_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
@@ -699,6 +817,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
699
817
|
if msg.startswith("Running "):
|
|
700
818
|
if not self.streaming_mode:
|
|
701
819
|
# create a fake "chunk" of a stream
|
|
820
|
+
assert msg_obj.tool_calls is not None and len(msg_obj.tool_calls) > 0, "Function call required for function_message"
|
|
702
821
|
function_call = msg_obj.tool_calls[0]
|
|
703
822
|
|
|
704
823
|
if self.nonstreaming_legacy_mode:
|
|
@@ -784,13 +903,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
784
903
|
return
|
|
785
904
|
else:
|
|
786
905
|
return
|
|
787
|
-
# msg = msg.replace("Running ", "")
|
|
788
|
-
# new_message = {"function_call": msg}
|
|
789
906
|
|
|
790
907
|
elif msg.startswith("Ran "):
|
|
791
908
|
return
|
|
792
|
-
# msg = msg.replace("Ran ", "Function call returned: ")
|
|
793
|
-
# new_message = {"function_call": msg}
|
|
794
909
|
|
|
795
910
|
elif msg.startswith("Success: "):
|
|
796
911
|
msg = msg.replace("Success: ", "")
|
|
@@ -821,10 +936,4 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
821
936
|
raise ValueError(msg)
|
|
822
937
|
new_message = {"function_message": msg}
|
|
823
938
|
|
|
824
|
-
# add extra metadata
|
|
825
|
-
# if msg_obj is not None:
|
|
826
|
-
# new_message["id"] = str(msg_obj.id)
|
|
827
|
-
# assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
|
|
828
|
-
# new_message["date"] = msg_obj.created_at.isoformat()
|
|
829
|
-
|
|
830
939
|
self._push_to_buffer(new_message)
|
|
@@ -430,9 +430,6 @@ async def send_message_to_agent(
|
|
|
430
430
|
# Get the generator object off of the agent's streaming interface
|
|
431
431
|
# This will be attached to the POST SSE request used under-the-hood
|
|
432
432
|
letta_agent = server._get_or_load_agent(agent_id=agent_id)
|
|
433
|
-
streaming_interface = letta_agent.interface
|
|
434
|
-
if not isinstance(streaming_interface, StreamingServerInterface):
|
|
435
|
-
raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
|
|
436
433
|
|
|
437
434
|
# Disable token streaming if not OpenAI
|
|
438
435
|
# TODO: cleanup this logic
|
|
@@ -441,6 +438,12 @@ async def send_message_to_agent(
|
|
|
441
438
|
print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
|
|
442
439
|
stream_tokens = False
|
|
443
440
|
|
|
441
|
+
# Create a new interface per request
|
|
442
|
+
letta_agent.interface = StreamingServerInterface()
|
|
443
|
+
streaming_interface = letta_agent.interface
|
|
444
|
+
if not isinstance(streaming_interface, StreamingServerInterface):
|
|
445
|
+
raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
|
|
446
|
+
|
|
444
447
|
# Enable token-streaming within the request if desired
|
|
445
448
|
streaming_interface.streaming_mode = stream_tokens
|
|
446
449
|
# "chatcompletion mode" does some remapping and ignores inner thoughts
|
|
@@ -454,6 +457,11 @@ async def send_message_to_agent(
|
|
|
454
457
|
streaming_interface.assistant_message_function_name = assistant_message_function_name
|
|
455
458
|
streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
|
|
456
459
|
|
|
460
|
+
# Related to JSON buffer reader
|
|
461
|
+
streaming_interface.inner_thoughts_in_kwargs = (
|
|
462
|
+
llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
|
|
463
|
+
)
|
|
464
|
+
|
|
457
465
|
# Offload the synchronous message_func to a separate thread
|
|
458
466
|
streaming_interface.stream_start()
|
|
459
467
|
task = asyncio.create_task(
|
letta/streaming_utils.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL_KWARG
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class JSONInnerThoughtsExtractor:
|
|
7
|
+
"""
|
|
8
|
+
A class to process incoming JSON fragments and extract 'inner_thoughts' separately from the main JSON.
|
|
9
|
+
|
|
10
|
+
This handler processes JSON fragments incrementally, parsing out the value associated with a specified key (default is 'inner_thoughts'). It maintains two separate buffers:
|
|
11
|
+
|
|
12
|
+
- `main_json`: Accumulates the JSON data excluding the 'inner_thoughts' key-value pair.
|
|
13
|
+
- `inner_thoughts`: Accumulates the value associated with the 'inner_thoughts' key.
|
|
14
|
+
|
|
15
|
+
**Parameters:**
|
|
16
|
+
|
|
17
|
+
- `inner_thoughts_key` (str): The key to extract from the JSON (default is 'inner_thoughts').
|
|
18
|
+
- `wait_for_first_key` (bool): If `True`, holds back main JSON output until after the 'inner_thoughts' value is processed.
|
|
19
|
+
|
|
20
|
+
**Functionality:**
|
|
21
|
+
|
|
22
|
+
- **Stateful Parsing:** Maintains parsing state across fragments.
|
|
23
|
+
- **String Handling:** Correctly processes strings, escape sequences, and quotation marks.
|
|
24
|
+
- **Selective Extraction:** Identifies and extracts the value of the specified key.
|
|
25
|
+
- **Fragment Processing:** Handles data that arrives in chunks.
|
|
26
|
+
|
|
27
|
+
**Usage:**
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
extractor = JSONInnerThoughtsExtractor(wait_for_first_key=True)
|
|
31
|
+
for fragment in fragments:
|
|
32
|
+
updates_main_json, updates_inner_thoughts = extractor.process_fragment(fragment)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, inner_thoughts_key="inner_thoughts", wait_for_first_key=False):
|
|
38
|
+
self.inner_thoughts_key = inner_thoughts_key
|
|
39
|
+
self.wait_for_first_key = wait_for_first_key
|
|
40
|
+
self.main_buffer = ""
|
|
41
|
+
self.inner_thoughts_buffer = ""
|
|
42
|
+
self.state = "start" # Possible states: start, key, colon, value, comma_or_end, end
|
|
43
|
+
self.in_string = False
|
|
44
|
+
self.escaped = False
|
|
45
|
+
self.current_key = ""
|
|
46
|
+
self.is_inner_thoughts_value = False
|
|
47
|
+
self.inner_thoughts_processed = False
|
|
48
|
+
self.hold_main_json = wait_for_first_key
|
|
49
|
+
self.main_json_held_buffer = ""
|
|
50
|
+
|
|
51
|
+
def process_fragment(self, fragment):
|
|
52
|
+
updates_main_json = ""
|
|
53
|
+
updates_inner_thoughts = ""
|
|
54
|
+
i = 0
|
|
55
|
+
while i < len(fragment):
|
|
56
|
+
c = fragment[i]
|
|
57
|
+
if self.escaped:
|
|
58
|
+
self.escaped = False
|
|
59
|
+
if self.in_string:
|
|
60
|
+
if self.state == "key":
|
|
61
|
+
self.current_key += c
|
|
62
|
+
elif self.state == "value":
|
|
63
|
+
if self.is_inner_thoughts_value:
|
|
64
|
+
updates_inner_thoughts += c
|
|
65
|
+
self.inner_thoughts_buffer += c
|
|
66
|
+
else:
|
|
67
|
+
if self.hold_main_json:
|
|
68
|
+
self.main_json_held_buffer += c
|
|
69
|
+
else:
|
|
70
|
+
updates_main_json += c
|
|
71
|
+
self.main_buffer += c
|
|
72
|
+
else:
|
|
73
|
+
if not self.is_inner_thoughts_value:
|
|
74
|
+
if self.hold_main_json:
|
|
75
|
+
self.main_json_held_buffer += c
|
|
76
|
+
else:
|
|
77
|
+
updates_main_json += c
|
|
78
|
+
self.main_buffer += c
|
|
79
|
+
elif c == "\\":
|
|
80
|
+
self.escaped = True
|
|
81
|
+
if self.in_string:
|
|
82
|
+
if self.state == "key":
|
|
83
|
+
self.current_key += c
|
|
84
|
+
elif self.state == "value":
|
|
85
|
+
if self.is_inner_thoughts_value:
|
|
86
|
+
updates_inner_thoughts += c
|
|
87
|
+
self.inner_thoughts_buffer += c
|
|
88
|
+
else:
|
|
89
|
+
if self.hold_main_json:
|
|
90
|
+
self.main_json_held_buffer += c
|
|
91
|
+
else:
|
|
92
|
+
updates_main_json += c
|
|
93
|
+
self.main_buffer += c
|
|
94
|
+
else:
|
|
95
|
+
if not self.is_inner_thoughts_value:
|
|
96
|
+
if self.hold_main_json:
|
|
97
|
+
self.main_json_held_buffer += c
|
|
98
|
+
else:
|
|
99
|
+
updates_main_json += c
|
|
100
|
+
self.main_buffer += c
|
|
101
|
+
elif c == '"':
|
|
102
|
+
if not self.escaped:
|
|
103
|
+
self.in_string = not self.in_string
|
|
104
|
+
if self.in_string:
|
|
105
|
+
if self.state in ["start", "comma_or_end"]:
|
|
106
|
+
self.state = "key"
|
|
107
|
+
self.current_key = ""
|
|
108
|
+
# Release held main_json when starting to process the next key
|
|
109
|
+
if self.wait_for_first_key and self.hold_main_json and self.inner_thoughts_processed:
|
|
110
|
+
updates_main_json += self.main_json_held_buffer
|
|
111
|
+
self.main_buffer += self.main_json_held_buffer
|
|
112
|
+
self.main_json_held_buffer = ""
|
|
113
|
+
self.hold_main_json = False
|
|
114
|
+
else:
|
|
115
|
+
if self.state == "key":
|
|
116
|
+
self.state = "colon"
|
|
117
|
+
elif self.state == "value":
|
|
118
|
+
# End of value
|
|
119
|
+
if self.is_inner_thoughts_value:
|
|
120
|
+
self.inner_thoughts_processed = True
|
|
121
|
+
# Do not release held main_json here
|
|
122
|
+
else:
|
|
123
|
+
if self.hold_main_json:
|
|
124
|
+
self.main_json_held_buffer += '"'
|
|
125
|
+
else:
|
|
126
|
+
updates_main_json += '"'
|
|
127
|
+
self.main_buffer += '"'
|
|
128
|
+
self.state = "comma_or_end"
|
|
129
|
+
else:
|
|
130
|
+
self.escaped = False
|
|
131
|
+
if self.in_string:
|
|
132
|
+
if self.state == "key":
|
|
133
|
+
self.current_key += '"'
|
|
134
|
+
elif self.state == "value":
|
|
135
|
+
if self.is_inner_thoughts_value:
|
|
136
|
+
updates_inner_thoughts += '"'
|
|
137
|
+
self.inner_thoughts_buffer += '"'
|
|
138
|
+
else:
|
|
139
|
+
if self.hold_main_json:
|
|
140
|
+
self.main_json_held_buffer += '"'
|
|
141
|
+
else:
|
|
142
|
+
updates_main_json += '"'
|
|
143
|
+
self.main_buffer += '"'
|
|
144
|
+
elif self.in_string:
|
|
145
|
+
if self.state == "key":
|
|
146
|
+
self.current_key += c
|
|
147
|
+
elif self.state == "value":
|
|
148
|
+
if self.is_inner_thoughts_value:
|
|
149
|
+
updates_inner_thoughts += c
|
|
150
|
+
self.inner_thoughts_buffer += c
|
|
151
|
+
else:
|
|
152
|
+
if self.hold_main_json:
|
|
153
|
+
self.main_json_held_buffer += c
|
|
154
|
+
else:
|
|
155
|
+
updates_main_json += c
|
|
156
|
+
self.main_buffer += c
|
|
157
|
+
else:
|
|
158
|
+
if c == ":" and self.state == "colon":
|
|
159
|
+
self.state = "value"
|
|
160
|
+
self.is_inner_thoughts_value = self.current_key == self.inner_thoughts_key
|
|
161
|
+
if self.is_inner_thoughts_value:
|
|
162
|
+
pass # Do not include 'inner_thoughts' key in main_json
|
|
163
|
+
else:
|
|
164
|
+
key_colon = f'"{self.current_key}":'
|
|
165
|
+
if self.hold_main_json:
|
|
166
|
+
self.main_json_held_buffer += key_colon + '"'
|
|
167
|
+
else:
|
|
168
|
+
updates_main_json += key_colon + '"'
|
|
169
|
+
self.main_buffer += key_colon + '"'
|
|
170
|
+
elif c == "," and self.state == "comma_or_end":
|
|
171
|
+
if self.is_inner_thoughts_value:
|
|
172
|
+
# Inner thoughts value ended
|
|
173
|
+
self.is_inner_thoughts_value = False
|
|
174
|
+
self.state = "start"
|
|
175
|
+
# Do not release held main_json here
|
|
176
|
+
else:
|
|
177
|
+
if self.hold_main_json:
|
|
178
|
+
self.main_json_held_buffer += c
|
|
179
|
+
else:
|
|
180
|
+
updates_main_json += c
|
|
181
|
+
self.main_buffer += c
|
|
182
|
+
self.state = "start"
|
|
183
|
+
elif c == "{":
|
|
184
|
+
if not self.is_inner_thoughts_value:
|
|
185
|
+
if self.hold_main_json:
|
|
186
|
+
self.main_json_held_buffer += c
|
|
187
|
+
else:
|
|
188
|
+
updates_main_json += c
|
|
189
|
+
self.main_buffer += c
|
|
190
|
+
elif c == "}":
|
|
191
|
+
self.state = "end"
|
|
192
|
+
if self.hold_main_json:
|
|
193
|
+
self.main_json_held_buffer += c
|
|
194
|
+
else:
|
|
195
|
+
updates_main_json += c
|
|
196
|
+
self.main_buffer += c
|
|
197
|
+
else:
|
|
198
|
+
if self.state == "value":
|
|
199
|
+
if self.is_inner_thoughts_value:
|
|
200
|
+
updates_inner_thoughts += c
|
|
201
|
+
self.inner_thoughts_buffer += c
|
|
202
|
+
else:
|
|
203
|
+
if self.hold_main_json:
|
|
204
|
+
self.main_json_held_buffer += c
|
|
205
|
+
else:
|
|
206
|
+
updates_main_json += c
|
|
207
|
+
self.main_buffer += c
|
|
208
|
+
i += 1
|
|
209
|
+
|
|
210
|
+
return updates_main_json, updates_inner_thoughts
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def main_json(self):
|
|
214
|
+
return self.main_buffer
|
|
215
|
+
|
|
216
|
+
@property
|
|
217
|
+
def inner_thoughts(self):
|
|
218
|
+
return self.inner_thoughts_buffer
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class FunctionArgumentsStreamHandler:
|
|
222
|
+
"""State machine that can process a stream of"""
|
|
223
|
+
|
|
224
|
+
def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
|
|
225
|
+
self.json_key = json_key
|
|
226
|
+
self.reset()
|
|
227
|
+
|
|
228
|
+
def reset(self):
|
|
229
|
+
self.in_message = False
|
|
230
|
+
self.key_buffer = ""
|
|
231
|
+
self.accumulating = False
|
|
232
|
+
self.message_started = False
|
|
233
|
+
|
|
234
|
+
def process_json_chunk(self, chunk: str) -> Optional[str]:
|
|
235
|
+
"""Process a chunk from the function arguments and return the plaintext version"""
|
|
236
|
+
|
|
237
|
+
# Use strip to handle only leading and trailing whitespace in control structures
|
|
238
|
+
if self.accumulating:
|
|
239
|
+
clean_chunk = chunk.strip()
|
|
240
|
+
if self.json_key in self.key_buffer:
|
|
241
|
+
if ":" in clean_chunk:
|
|
242
|
+
self.in_message = True
|
|
243
|
+
self.accumulating = False
|
|
244
|
+
return None
|
|
245
|
+
self.key_buffer += clean_chunk
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
if self.in_message:
|
|
249
|
+
if chunk.strip() == '"' and self.message_started:
|
|
250
|
+
self.in_message = False
|
|
251
|
+
self.message_started = False
|
|
252
|
+
return None
|
|
253
|
+
if not self.message_started and chunk.strip() == '"':
|
|
254
|
+
self.message_started = True
|
|
255
|
+
return None
|
|
256
|
+
if self.message_started:
|
|
257
|
+
if chunk.strip().endswith('"'):
|
|
258
|
+
self.in_message = False
|
|
259
|
+
return chunk.rstrip('"\n')
|
|
260
|
+
return chunk
|
|
261
|
+
|
|
262
|
+
if chunk.strip() == "{":
|
|
263
|
+
self.key_buffer = ""
|
|
264
|
+
self.accumulating = True
|
|
265
|
+
return None
|
|
266
|
+
if chunk.strip() == "}":
|
|
267
|
+
self.in_message = False
|
|
268
|
+
self.message_started = False
|
|
269
|
+
return None
|
|
270
|
+
return None
|
|
@@ -41,10 +41,10 @@ letta/llm_api/azure_openai.py,sha256=C-fuuholudcLJDWjqnXJwpXsfmGWfNugEVWyj6YCrpg
|
|
|
41
41
|
letta/llm_api/azure_openai_constants.py,sha256=oXtKrgBFHf744gyt5l1thILXgyi8NDNUrKEa2GGGpjw,278
|
|
42
42
|
letta/llm_api/cohere.py,sha256=vDRd-SUGp1t_JUIdwC3RkIhwMl0OY7n-tAU9uPORYkY,14826
|
|
43
43
|
letta/llm_api/google_ai.py,sha256=3xZ074nSOCC22c15yerA5ngWzh0ex4wxeI-6faNbHPE,17708
|
|
44
|
-
letta/llm_api/helpers.py,sha256=
|
|
44
|
+
letta/llm_api/helpers.py,sha256=sGCmNA1U_7-AhRFgvT668jdp_xyzSliKQYbTvRR6O7c,9812
|
|
45
45
|
letta/llm_api/llm_api_tools.py,sha256=GEBO7Dlt7xtAQud1sVsigKZKPpLOZOt2IWL8LwcNV4o,14869
|
|
46
46
|
letta/llm_api/mistral.py,sha256=fHdfD9ug-rQIk2qn8tRKay1U6w9maF11ryhKi91FfXM,1593
|
|
47
|
-
letta/llm_api/openai.py,sha256=
|
|
47
|
+
letta/llm_api/openai.py,sha256=_kztTpd7IR12bRjvMT1n8dvy5bINvgJ3od65HBQunVk,23212
|
|
48
48
|
letta/local_llm/README.md,sha256=hFJyw5B0TU2jrh9nb0zGZMgdH-Ei1dSRfhvPQG_NSoU,168
|
|
49
49
|
letta/local_llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
50
|
letta/local_llm/chat_completion_proxy.py,sha256=SiohxsjGTku4vOryOZx7I0t0xoO_sUuhXgoe62fKq3c,12995
|
|
@@ -151,7 +151,7 @@ letta/server/rest_api/app.py,sha256=JNmDnvp9fP--hJPtPpEWgQT-14O1YOceZbWELr2vedA,
|
|
|
151
151
|
letta/server/rest_api/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
152
152
|
letta/server/rest_api/auth/index.py,sha256=fQBGyVylGSRfEMLQ17cZzrHd5Y1xiVylvPqH5Rl-lXQ,1378
|
|
153
153
|
letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
|
|
154
|
-
letta/server/rest_api/interface.py,sha256=
|
|
154
|
+
letta/server/rest_api/interface.py,sha256=Km1nJSN8Ogyj5P-DmDRyR5fEov-G0o9KEIXAlsqqb5A,44525
|
|
155
155
|
letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
156
|
letta/server/rest_api/routers/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
157
157
|
letta/server/rest_api/routers/openai/assistants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -161,7 +161,7 @@ letta/server/rest_api/routers/openai/assistants/threads.py,sha256=WXVGBaBvSNPB7Z
|
|
|
161
161
|
letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
162
|
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=-uye6cm4SnoQGwxhr1N1FrSXOlnO2Hvbfj6k8JSc45k,4918
|
|
163
163
|
letta/server/rest_api/routers/v1/__init__.py,sha256=sqlVZa-u9DJwdRsp0_8YUGrac9DHguIB4wETlEDRylA,666
|
|
164
|
-
letta/server/rest_api/routers/v1/agents.py,sha256=
|
|
164
|
+
letta/server/rest_api/routers/v1/agents.py,sha256=BY4rQOcwsi_WiWh6DwrO8Vz6Nu2JgMBKSHxiBqlaaYY,25397
|
|
165
165
|
letta/server/rest_api/routers/v1/blocks.py,sha256=0WekE_yBD2U3jYgPxI0DCFjACWavCAlvm_Ybw5SZBnw,2583
|
|
166
166
|
letta/server/rest_api/routers/v1/health.py,sha256=pKCuVESlVOhGIb4VC4K-H82eZqfghmT6kvj2iOkkKuc,401
|
|
167
167
|
letta/server/rest_api/routers/v1/jobs.py,sha256=a-j0v-5A0un0pVCOHpfeWnzpOWkVDQO6ti42k_qAlZY,2272
|
|
@@ -187,10 +187,11 @@ letta/server/ws_api/protocol.py,sha256=M_-gM5iuDBwa1cuN2IGNCG5GxMJwU2d3XW93XALv9
|
|
|
187
187
|
letta/server/ws_api/server.py,sha256=C2Kv48PCwl46DQFb0ZP30s86KJLQ6dZk2AhWQEZn9pY,6004
|
|
188
188
|
letta/settings.py,sha256=gNdH-Ty6f-Nfz2j9ZMZFRQHac2KzgsxLZNt5l_TiAyo,3301
|
|
189
189
|
letta/streaming_interface.py,sha256=_FPUWy58j50evHcpXyd7zB1wWqeCc71NCFeWh_TBvnw,15736
|
|
190
|
+
letta/streaming_utils.py,sha256=329fsvj1ZN0r0LpQtmMPZ2vSxkDBIUUwvGHZFkjm2I8,11745
|
|
190
191
|
letta/system.py,sha256=buKYPqG5n2x41hVmWpu6JUpyd7vTWED9Km2_M7dLrvk,6960
|
|
191
192
|
letta/utils.py,sha256=SXLEYhyp3gHyIjrxNIKNZZ5ittKo3KOj6zxgC_Trex0,31012
|
|
192
|
-
letta_nightly-0.5.0.
|
|
193
|
-
letta_nightly-0.5.0.
|
|
194
|
-
letta_nightly-0.5.0.
|
|
195
|
-
letta_nightly-0.5.0.
|
|
196
|
-
letta_nightly-0.5.0.
|
|
193
|
+
letta_nightly-0.5.0.dev20241022104124.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
|
|
194
|
+
letta_nightly-0.5.0.dev20241022104124.dist-info/METADATA,sha256=v5hk_4eSmZRsN51JSel2I1mXzkM4XCHiAJrBF7fPC-Y,10620
|
|
195
|
+
letta_nightly-0.5.0.dev20241022104124.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
196
|
+
letta_nightly-0.5.0.dev20241022104124.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
|
|
197
|
+
letta_nightly-0.5.0.dev20241022104124.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|