letta-nightly 0.6.16.dev20250129104019__py3-none-any.whl → 0.6.17.dev20250129174639__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (35) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +0 -3
  3. letta/client/client.py +5 -5
  4. letta/client/streaming.py +29 -20
  5. letta/constants.py +1 -1
  6. letta/functions/function_sets/multi_agent.py +55 -49
  7. letta/functions/functions.py +0 -1
  8. letta/functions/helpers.py +149 -9
  9. letta/llm_api/llm_api_tools.py +20 -12
  10. letta/llm_api/openai.py +15 -13
  11. letta/orm/agent.py +14 -2
  12. letta/orm/job.py +1 -1
  13. letta/orm/sqlalchemy_base.py +12 -4
  14. letta/schemas/job.py +17 -1
  15. letta/schemas/letta_request.py +2 -7
  16. letta/schemas/llm_config.py +9 -0
  17. letta/schemas/message.py +51 -22
  18. letta/schemas/openai/chat_completion_response.py +2 -2
  19. letta/schemas/run.py +1 -2
  20. letta/server/rest_api/app.py +5 -1
  21. letta/server/rest_api/chat_completions_interface.py +256 -0
  22. letta/server/rest_api/optimistic_json_parser.py +185 -0
  23. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  24. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +161 -0
  25. letta/server/rest_api/routers/v1/agents.py +22 -32
  26. letta/server/server.py +12 -12
  27. letta/services/job_manager.py +7 -12
  28. letta/services/tool_manager.py +17 -1
  29. letta/system.py +20 -0
  30. letta/utils.py +24 -1
  31. {letta_nightly-0.6.16.dev20250129104019.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/METADATA +4 -4
  32. {letta_nightly-0.6.16.dev20250129104019.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/RECORD +35 -31
  33. {letta_nightly-0.6.16.dev20250129104019.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/LICENSE +0 -0
  34. {letta_nightly-0.6.16.dev20250129104019.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/WHEEL +0 -0
  35. {letta_nightly-0.6.16.dev20250129104019.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,256 @@
1
+ import asyncio
2
+ from collections import deque
3
+ from datetime import datetime
4
+ from typing import AsyncGenerator, Optional, Union
5
+
6
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
7
+
8
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
9
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
10
+ from letta.log import get_logger
11
+ from letta.schemas.enums import MessageStreamStatus
12
+ from letta.schemas.letta_message import LettaMessage
13
+ from letta.schemas.message import Message
14
+ from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
15
+ from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
16
+ from letta.streaming_interface import AgentChunkStreamingInterface
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
22
+ """
23
+ Provides an asynchronous streaming mechanism for LLM output. Internally
24
+ maintains a queue of chunks that can be consumed via an async generator.
25
+
26
+ Key Behaviors:
27
+ - process_chunk: Accepts ChatCompletionChunkResponse objects (e.g. from an
28
+ OpenAI-like streaming API), potentially transforms them to a partial
29
+ text response, and enqueues them.
30
+ - get_generator: Returns an async generator that yields messages or status
31
+ markers as they become available.
32
+ - step_complete, step_yield: End streaming for the current step or entirely,
33
+ depending on the multi_step setting.
34
+ - function_message, internal_monologue: Handle LLM “function calls” and
35
+ “reasoning” messages for non-streaming contexts.
36
+ """
37
+
38
+ FINISH_REASON_STR = "stop"
39
+ ASSISTANT_STR = "assistant"
40
+
41
+ def __init__(
42
+ self,
43
+ multi_step: bool = True,
44
+ timeout: int = 150,
45
+ # The following are placeholders for potential expansions; they
46
+ # remain if you need to differentiate between actual "assistant messages"
47
+ # vs. tool calls. By default, they are set for the "send_message" tool usage.
48
+ assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
49
+ assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
50
+ inner_thoughts_in_kwargs: bool = True,
51
+ inner_thoughts_kwarg: str = INNER_THOUGHTS_KWARG,
52
+ ):
53
+ self.streaming_mode = True
54
+
55
+ # Parsing state for incremental function-call data
56
+ self.current_function_name = ""
57
+ self.current_function_arguments = []
58
+
59
+ # Internal chunk buffer and event for async notification
60
+ self._chunks = deque()
61
+ self._event = asyncio.Event()
62
+ self._active = True
63
+
64
+ # Whether or not the stream should remain open across multiple steps
65
+ self.multi_step = multi_step
66
+
67
+ # Timing / debug parameters
68
+ self.timeout = timeout
69
+
70
+ # These are placeholders to handle specialized
71
+ # assistant message logic or storing inner thoughts.
72
+ self.assistant_message_tool_name = assistant_message_tool_name
73
+ self.assistant_message_tool_kwarg = assistant_message_tool_kwarg
74
+ self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
75
+ self.inner_thoughts_kwarg = inner_thoughts_kwarg
76
+
77
+ async def _create_generator(
78
+ self,
79
+ ) -> AsyncGenerator[Union[LettaMessage, MessageStreamStatus], None]:
80
+ """
81
+ An asynchronous generator that yields queued items as they arrive.
82
+ Ends when _active is set to False or when timing out.
83
+ """
84
+ while self._active:
85
+ try:
86
+ await asyncio.wait_for(self._event.wait(), timeout=self.timeout)
87
+ except asyncio.TimeoutError:
88
+ break
89
+
90
+ while self._chunks:
91
+ yield self._chunks.popleft()
92
+
93
+ self._event.clear()
94
+
95
+ def get_generator(self) -> AsyncGenerator:
96
+ """
97
+ Provide the async generator interface. Will raise StopIteration
98
+ if the stream is inactive.
99
+ """
100
+ if not self._active:
101
+ raise StopIteration("The stream is not active.")
102
+ return self._create_generator()
103
+
104
+ def _push_to_buffer(
105
+ self,
106
+ item: ChatCompletionChunk,
107
+ ):
108
+ """
109
+ Add an item (a LettaMessage, status marker, or partial chunk)
110
+ to the queue and signal waiting consumers.
111
+ """
112
+ if not self._active:
113
+ raise RuntimeError("Attempted to push to an inactive stream.")
114
+ self._chunks.append(item)
115
+ self._event.set()
116
+
117
+ def stream_start(self) -> None:
118
+ """Initialize or reset the streaming state for a new request."""
119
+ self._active = True
120
+ self._chunks.clear()
121
+ self._event.clear()
122
+ self._reset_parsing_state()
123
+
124
+ def stream_end(self) -> None:
125
+ """
126
+ Clean up after the current streaming session. Typically called when the
127
+ request is done or the data source has signaled it has no more data.
128
+ """
129
+ self._reset_parsing_state()
130
+
131
+ def step_complete(self) -> None:
132
+ """
133
+ Indicate that one step of multi-step generation is done.
134
+ If multi_step=False, the stream is closed immediately.
135
+ """
136
+ if not self.multi_step:
137
+ self._active = False
138
+ self._event.set() # Ensure waiting generators can finalize
139
+ self._reset_parsing_state()
140
+
141
+ def step_yield(self) -> None:
142
+ """
143
+ Explicitly end the stream in a multi-step scenario, typically
144
+ called when the entire chain of steps is complete.
145
+ """
146
+ self._active = False
147
+ self._event.set()
148
+
149
+ @staticmethod
150
+ def clear() -> None:
151
+ """No-op retained for interface compatibility."""
152
+ return
153
+
154
+ def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime) -> None:
155
+ """
156
+ Called externally with a ChatCompletionChunkResponse. Transforms
157
+ it if necessary, then enqueues partial messages for streaming back.
158
+ """
159
+ processed_chunk = self._process_chunk_to_openai_style(chunk)
160
+ if processed_chunk is not None:
161
+ self._push_to_buffer(processed_chunk)
162
+
163
+ def user_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
164
+ """
165
+ Handle user messages. Here, it's a no-op, but included if your
166
+ pipeline needs to respond to user messages distinctly.
167
+ """
168
+ return
169
+
170
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
171
+ """
172
+ Handle LLM reasoning or internal monologue. Example usage: if you want
173
+ to capture chain-of-thought for debugging in a non-streaming scenario.
174
+ """
175
+ return
176
+
177
+ def assistant_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
178
+ """
179
+ Handle direct assistant messages. This class primarily handles them
180
+ as function calls, so it's a no-op by default.
181
+ """
182
+ return
183
+
184
+ def function_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
185
+ """
186
+ Handle function-related log messages, typically of the form:
187
+ It's a no-op by default.
188
+ """
189
+ return
190
+
191
+ def _process_chunk_to_openai_style(self, chunk: ChatCompletionChunkResponse) -> Optional[ChatCompletionChunk]:
192
+ """
193
+ Optionally transform an inbound OpenAI-style chunk so that partial
194
+ content (especially from a 'send_message' tool) is exposed as text
195
+ deltas in 'content'. Otherwise, pass through or yield finish reasons.
196
+ """
197
+ choice = chunk.choices[0]
198
+ delta = choice.delta
199
+
200
+ # If there's direct content, we usually let it stream as-is
201
+ if delta.content is not None:
202
+ # TODO: Eventually use all of the native OpenAI objects
203
+ return ChatCompletionChunk(**chunk.model_dump(exclude_none=True))
204
+
205
+ # If there's a function call, accumulate its name/args. If it's a known
206
+ # text-producing function (like send_message), stream partial text.
207
+ if delta.tool_calls:
208
+ tool_call = delta.tool_calls[0]
209
+ if tool_call.function.name:
210
+ self.current_function_name += tool_call.function.name
211
+ if tool_call.function.arguments:
212
+ self.current_function_arguments.append(tool_call.function.arguments)
213
+
214
+ # Only parse arguments for "send_message" to stream partial text
215
+ if self.current_function_name.strip() == self.assistant_message_tool_name:
216
+ combined_args = "".join(self.current_function_arguments)
217
+ parsed_args = OptimisticJSONParser().parse(combined_args)
218
+
219
+ # If we can see a "message" field, return it as partial content
220
+ if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
221
+ return ChatCompletionChunk(
222
+ id=chunk.id,
223
+ object=chunk.object,
224
+ created=chunk.created.timestamp(),
225
+ model=chunk.model,
226
+ choices=[
227
+ Choice(
228
+ index=choice.index,
229
+ delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
230
+ finish_reason=None,
231
+ )
232
+ ],
233
+ )
234
+
235
+ # If there's a finish reason, pass that along
236
+ if choice.finish_reason is not None:
237
+ return ChatCompletionChunk(
238
+ id=chunk.id,
239
+ object=chunk.object,
240
+ created=chunk.created.timestamp(),
241
+ model=chunk.model,
242
+ choices=[
243
+ Choice(
244
+ index=choice.index,
245
+ delta=ChoiceDelta(),
246
+ finish_reason=self.FINISH_REASON_STR,
247
+ )
248
+ ],
249
+ )
250
+
251
+ return None
252
+
253
+ def _reset_parsing_state(self) -> None:
254
+ """Clears internal buffers for function call name/args."""
255
+ self.current_function_name = ""
256
+ self.current_function_arguments = []
@@ -0,0 +1,185 @@
1
+ import json
2
+
3
+
4
+ class OptimisticJSONParser:
5
+ """
6
+ A JSON parser that attempts to parse a given string using `json.loads`,
7
+ and if that fails, it parses as much valid JSON as possible while
8
+ allowing extra tokens to remain. Those extra tokens can be retrieved
9
+ from `self.last_parse_reminding`. If `strict` is False, the parser
10
+ tries to tolerate incomplete strings and incomplete numbers.
11
+ """
12
+
13
+ def __init__(self, strict=True):
14
+ self.strict = strict
15
+ self.parsers = {
16
+ " ": self.parse_space,
17
+ "\r": self.parse_space,
18
+ "\n": self.parse_space,
19
+ "\t": self.parse_space,
20
+ "[": self.parse_array,
21
+ "{": self.parse_object,
22
+ '"': self.parse_string,
23
+ "t": self.parse_true,
24
+ "f": self.parse_false,
25
+ "n": self.parse_null,
26
+ }
27
+ # Register number parser for digits and signs
28
+ for char in "0123456789.-":
29
+ self.parsers[char] = self.parse_number
30
+
31
+ self.last_parse_reminding = None
32
+ self.on_extra_token = self.default_on_extra_token
33
+
34
+ def default_on_extra_token(self, text, data, reminding):
35
+ pass
36
+
37
+ def parse(self, input_str):
38
+ """
39
+ Try to parse the entire `input_str` as JSON. If parsing fails,
40
+ attempts a partial parse, storing leftover text in
41
+ `self.last_parse_reminding`. A callback (`on_extra_token`) is
42
+ triggered if extra tokens remain.
43
+ """
44
+ if len(input_str) >= 1:
45
+ try:
46
+ return json.loads(input_str)
47
+ except json.JSONDecodeError as decode_error:
48
+ data, reminding = self.parse_any(input_str, decode_error)
49
+ self.last_parse_reminding = reminding
50
+ if self.on_extra_token and reminding:
51
+ self.on_extra_token(input_str, data, reminding)
52
+ return data
53
+ else:
54
+ return json.loads("{}")
55
+
56
+ def parse_any(self, input_str, decode_error):
57
+ """Determine which parser to use based on the first character."""
58
+ if not input_str:
59
+ raise decode_error
60
+ parser = self.parsers.get(input_str[0])
61
+ if parser is None:
62
+ raise decode_error
63
+ return parser(input_str, decode_error)
64
+
65
+ def parse_space(self, input_str, decode_error):
66
+ """Strip leading whitespace and parse again."""
67
+ return self.parse_any(input_str.strip(), decode_error)
68
+
69
+ def parse_array(self, input_str, decode_error):
70
+ """Parse a JSON array, returning the list and remaining string."""
71
+ # Skip the '['
72
+ input_str = input_str[1:]
73
+ array_values = []
74
+ input_str = input_str.strip()
75
+ while input_str:
76
+ if input_str[0] == "]":
77
+ # Skip the ']'
78
+ input_str = input_str[1:]
79
+ break
80
+ value, input_str = self.parse_any(input_str, decode_error)
81
+ array_values.append(value)
82
+ input_str = input_str.strip()
83
+ if input_str.startswith(","):
84
+ # Skip the ','
85
+ input_str = input_str[1:].strip()
86
+ return array_values, input_str
87
+
88
+ def parse_object(self, input_str, decode_error):
89
+ """Parse a JSON object, returning the dict and remaining string."""
90
+ # Skip the '{'
91
+ input_str = input_str[1:]
92
+ obj = {}
93
+ input_str = input_str.strip()
94
+ while input_str:
95
+ if input_str[0] == "}":
96
+ # Skip the '}'
97
+ input_str = input_str[1:]
98
+ break
99
+ key, input_str = self.parse_any(input_str, decode_error)
100
+ input_str = input_str.strip()
101
+
102
+ if not input_str or input_str[0] == "}":
103
+ obj[key] = None
104
+ break
105
+ if input_str[0] != ":":
106
+ raise decode_error
107
+
108
+ # Skip ':'
109
+ input_str = input_str[1:].strip()
110
+ if not input_str or input_str[0] in ",}":
111
+ obj[key] = None
112
+ if input_str.startswith(","):
113
+ input_str = input_str[1:]
114
+ break
115
+
116
+ value, input_str = self.parse_any(input_str, decode_error)
117
+ obj[key] = value
118
+ input_str = input_str.strip()
119
+ if input_str.startswith(","):
120
+ # Skip the ','
121
+ input_str = input_str[1:].strip()
122
+ return obj, input_str
123
+
124
+ def parse_string(self, input_str, decode_error):
125
+ """Parse a JSON string, respecting escaped quotes if present."""
126
+ end = input_str.find('"', 1)
127
+ while end != -1 and input_str[end - 1] == "\\":
128
+ end = input_str.find('"', end + 1)
129
+
130
+ if end == -1:
131
+ # Incomplete string
132
+ if not self.strict:
133
+ return input_str[1:], ""
134
+ return json.loads(f'"{input_str[1:]}"'), ""
135
+
136
+ str_val = input_str[: end + 1]
137
+ input_str = input_str[end + 1 :]
138
+ if not self.strict:
139
+ return str_val[1:-1], input_str
140
+ return json.loads(str_val), input_str
141
+
142
+ def parse_number(self, input_str, decode_error):
143
+ """
144
+ Parse a number (int or float). Allows digits, '.', '-', but
145
+ doesn't fully validate complex exponents unless they appear
146
+ before a non-number character.
147
+ """
148
+ idx = 0
149
+ while idx < len(input_str) and input_str[idx] in "0123456789.-":
150
+ idx += 1
151
+
152
+ num_str = input_str[:idx]
153
+ remainder = input_str[idx:]
154
+
155
+ # If it's only a sign or just '.', return as-is with empty remainder
156
+ if not num_str or num_str in {"-", "."}:
157
+ return num_str, ""
158
+
159
+ try:
160
+ if num_str.endswith("."):
161
+ num = int(num_str[:-1])
162
+ else:
163
+ num = float(num_str) if any(c in num_str for c in ".eE") else int(num_str)
164
+ except ValueError:
165
+ raise decode_error
166
+
167
+ return num, remainder
168
+
169
+ def parse_true(self, input_str, decode_error):
170
+ """Parse a 'true' value."""
171
+ if input_str.startswith(("t", "T")):
172
+ return True, input_str[4:]
173
+ raise decode_error
174
+
175
+ def parse_false(self, input_str, decode_error):
176
+ """Parse a 'false' value."""
177
+ if input_str.startswith(("f", "F")):
178
+ return False, input_str[5:]
179
+ raise decode_error
180
+
181
+ def parse_null(self, input_str, decode_error):
182
+ """Parse a 'null' value."""
183
+ if input_str.startswith("n"):
184
+ return None, input_str[4:]
185
+ raise decode_error
@@ -0,0 +1,161 @@
1
+ import asyncio
2
+ from typing import TYPE_CHECKING, Iterable, List, Optional, Union, cast
3
+
4
+ from fastapi import APIRouter, Body, Depends, Header, HTTPException
5
+ from fastapi.responses import StreamingResponse
6
+ from openai.types.chat import ChatCompletionMessageParam
7
+ from openai.types.chat.completion_create_params import CompletionCreateParams
8
+
9
+ from letta.agent import Agent
10
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
11
+ from letta.log import get_logger
12
+ from letta.schemas.message import MessageCreate
13
+ from letta.schemas.openai.chat_completion_response import Message
14
+ from letta.schemas.user import User
15
+ from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
16
+
17
+ # TODO this belongs in a controller!
18
+ from letta.server.rest_api.utils import get_letta_server, sse_async_generator
19
+
20
+ if TYPE_CHECKING:
21
+ from letta.server.server import SyncServer
22
+
23
+ router = APIRouter(prefix="/v1", tags=["chat_completions"])
24
+
25
+ logger = get_logger(__name__)
26
+
27
+
28
+ @router.post(
29
+ "/chat/completions",
30
+ response_model=None,
31
+ operation_id="create_chat_completions",
32
+ responses={
33
+ 200: {
34
+ "description": "Successful response",
35
+ "content": {
36
+ "text/event-stream": {"description": "Server-Sent Events stream"},
37
+ },
38
+ }
39
+ },
40
+ )
41
+ async def create_chat_completions(
42
+ completion_request: CompletionCreateParams = Body(...),
43
+ server: "SyncServer" = Depends(get_letta_server),
44
+ user_id: Optional[str] = Header(None, alias="user_id"),
45
+ ):
46
+ # Validate and process fields
47
+ try:
48
+ messages = list(cast(Iterable[ChatCompletionMessageParam], completion_request["messages"]))
49
+ except KeyError:
50
+ # Handle the case where "messages" is not present in the request
51
+ raise HTTPException(status_code=400, detail="The 'messages' field is missing in the request.")
52
+ except TypeError:
53
+ # Handle the case where "messages" is not iterable
54
+ raise HTTPException(status_code=400, detail="The 'messages' field must be an iterable.")
55
+ except Exception as e:
56
+ # Catch any other unexpected errors and include the exception message
57
+ raise HTTPException(status_code=400, detail=f"An error occurred while processing 'messages': {str(e)}")
58
+
59
+ if messages[-1]["role"] != "user":
60
+ logger.error(f"The last message does not have a `user` role: {messages}")
61
+ raise HTTPException(status_code=400, detail="'messages[-1].role' must be a 'user'")
62
+
63
+ input_message = messages[-1]
64
+ if not isinstance(input_message["content"], str):
65
+ logger.error(f"The input message does not have valid content: {input_message}")
66
+ raise HTTPException(status_code=400, detail="'messages[-1].content' must be a 'string'")
67
+
68
+ # Process remaining fields
69
+ if not completion_request["stream"]:
70
+ raise HTTPException(status_code=400, detail="Must be streaming request: `stream` was set to `False` in the request.")
71
+
72
+ actor = server.user_manager.get_user_or_default(user_id=user_id)
73
+
74
+ agent_id = str(completion_request.get("user", None))
75
+ if agent_id is None:
76
+ error_msg = "Must pass agent_id in the 'user' field"
77
+ logger.error(error_msg)
78
+ raise HTTPException(status_code=400, detail=error_msg)
79
+
80
+ letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
81
+ llm_config = letta_agent.agent_state.llm_config
82
+ if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
83
+ error_msg = f"You can only use models with type 'openai' for chat completions. This agent {agent_id} has llm_config: \n{llm_config.model_dump_json(indent=4)}"
84
+ logger.error(error_msg)
85
+ raise HTTPException(status_code=400, detail=error_msg)
86
+
87
+ model = completion_request.get("model")
88
+ if model != llm_config.model:
89
+ warning_msg = f"The requested model {model} is different from the model specified in this agent's ({agent_id}) llm_config: \n{llm_config.model_dump_json(indent=4)}"
90
+ logger.warning(f"Defaulting to {llm_config.model}...")
91
+ logger.warning(warning_msg)
92
+
93
+ logger.info(f"Received input message: {input_message}")
94
+
95
+ return await send_message_to_agent_chat_completions(
96
+ server=server,
97
+ letta_agent=letta_agent,
98
+ actor=actor,
99
+ messages=[MessageCreate(role=input_message["role"], content=input_message["content"])],
100
+ )
101
+
102
+
103
+ async def send_message_to_agent_chat_completions(
104
+ server: "SyncServer",
105
+ letta_agent: Agent,
106
+ actor: User,
107
+ messages: Union[List[Message], List[MessageCreate]],
108
+ assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
109
+ assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
110
+ ) -> StreamingResponse:
111
+ """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
112
+ # For streaming response
113
+ try:
114
+ # TODO: cleanup this logic
115
+ llm_config = letta_agent.agent_state.llm_config
116
+
117
+ # Create a new interface per request
118
+ letta_agent.interface = ChatCompletionsStreamingInterface()
119
+ streaming_interface = letta_agent.interface
120
+ if not isinstance(streaming_interface, ChatCompletionsStreamingInterface):
121
+ raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
122
+
123
+ # Allow AssistantMessage is desired by client
124
+ streaming_interface.assistant_message_tool_name = assistant_message_tool_name
125
+ streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
126
+
127
+ # Related to JSON buffer reader
128
+ streaming_interface.inner_thoughts_in_kwargs = (
129
+ llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
130
+ )
131
+
132
+ # Offload the synchronous message_func to a separate thread
133
+ streaming_interface.stream_start()
134
+ asyncio.create_task(
135
+ asyncio.to_thread(
136
+ server.send_messages,
137
+ actor=actor,
138
+ agent_id=letta_agent.agent_state.id,
139
+ messages=messages,
140
+ interface=streaming_interface,
141
+ )
142
+ )
143
+
144
+ # return a stream
145
+ return StreamingResponse(
146
+ sse_async_generator(
147
+ streaming_interface.get_generator(),
148
+ usage_task=None,
149
+ finish_message=True,
150
+ ),
151
+ media_type="text/event-stream",
152
+ )
153
+
154
+ except HTTPException:
155
+ raise
156
+ except Exception as e:
157
+ print(e)
158
+ import traceback
159
+
160
+ traceback.print_exc()
161
+ raise HTTPException(status_code=500, detail=f"{e}")