PyPI - letta-nightly - Versions diffs - 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl - Mend

letta-nightly 0.8.4.dev20250614104137py3-none-any.whl → 0.8.4.dev20250615221417py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

letta/__init__.py +1 -0
letta/agents/base_agent.py +12 -1
letta/agents/helpers.py +5 -2
letta/agents/letta_agent.py +98 -61
letta/agents/voice_sleeptime_agent.py +2 -1
letta/constants.py +3 -5
letta/data_sources/redis_client.py +30 -10
letta/functions/function_sets/files.py +4 -4
letta/functions/helpers.py +6 -1
letta/functions/mcp_client/types.py +95 -0
letta/groups/sleeptime_multi_agent_v2.py +2 -1
letta/helpers/decorators.py +91 -0
letta/interfaces/anthropic_streaming_interface.py +11 -0
letta/interfaces/openai_streaming_interface.py +244 -225
letta/llm_api/openai_client.py +1 -1
letta/local_llm/utils.py +5 -1
letta/orm/enums.py +1 -0
letta/orm/mcp_server.py +3 -0
letta/orm/tool.py +3 -0
letta/otel/metric_registry.py +12 -0
letta/otel/metrics.py +16 -7
letta/schemas/letta_response.py +6 -1
letta/schemas/letta_stop_reason.py +22 -0
letta/schemas/mcp.py +48 -6
letta/schemas/openai/chat_completion_request.py +1 -1
letta/schemas/openai/chat_completion_response.py +1 -1
letta/schemas/pip_requirement.py +14 -0
letta/schemas/sandbox_config.py +1 -19
letta/schemas/tool.py +5 -0
letta/server/rest_api/json_parser.py +39 -3
letta/server/rest_api/routers/v1/tools.py +3 -1
letta/server/rest_api/routers/v1/voice.py +2 -3
letta/server/rest_api/utils.py +1 -1
letta/server/server.py +11 -2
letta/services/agent_manager.py +37 -29
letta/services/helpers/tool_execution_helper.py +39 -9
letta/services/mcp/base_client.py +13 -2
letta/services/mcp/sse_client.py +8 -1
letta/services/mcp/streamable_http_client.py +56 -0
letta/services/mcp_manager.py +23 -9
letta/services/message_manager.py +30 -3
letta/services/tool_executor/files_tool_executor.py +2 -3
letta/services/tool_sandbox/e2b_sandbox.py +53 -3
letta/services/tool_sandbox/local_sandbox.py +3 -1
letta/services/user_manager.py +22 -0
letta/settings.py +3 -0
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -6,13 +6,19 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
+from letta.log import get_logger
+from letta.otel.context import get_ctx_attributes
+from letta.otel.metric_registry import MetricRegistry
 from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
 from letta.schemas.letta_message_content import TextContent
+from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
 from letta.server.rest_api.json_parser import OptimisticJSONParser
 from letta.streaming_utils import JSONInnerThoughtsExtractor
+logger = get_logger(__name__)
 class OpenAIStreamingInterface:
     """
@@ -60,6 +66,8 @@ class OpenAIStreamingInterface:
     def get_tool_call_object(self) -> ToolCall:
         """Useful for agent loop"""
         function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
+        if not function_name:
+            raise ValueError("No tool call ID available")
         tool_call_id = self.last_flushed_function_id if self.last_flushed_function_id else self.function_id_buffer
         if not tool_call_id:
             raise ValueError("No tool call ID available")
@@ -79,254 +87,265 @@ class OpenAIStreamingInterface:
         It also collects tokens and detects if a tool call is triggered.
         """
         first_chunk = True
+        try:
+            async with stream:
+                prev_message_type = None
+                message_index = 0
+                async for chunk in stream:
+                    if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
+                        now = get_utc_timestamp_ns()
+                        ttft_ns = now - provider_request_start_timestamp_ns
+                        ttft_span.add_event(
+                            name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
+                        )
+                        metric_attributes = get_ctx_attributes()
+                        metric_attributes["model.name"] = chunk.model
+                        MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
+                        first_chunk = False
+                    if not self.model or not self.message_id:
+                        self.model = chunk.model
+                        self.message_id = chunk.id
+                    # track usage
+                    if chunk.usage:
+                        self.input_tokens += chunk.usage.prompt_tokens
+                        self.output_tokens += chunk.usage.completion_tokens
+                    if chunk.choices:
+                        choice = chunk.choices[0]
+                        message_delta = choice.delta
+                        if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
+                            tool_call = message_delta.tool_calls[0]
+                            if tool_call.function.name:
+                                # If we're waiting for the first key, then we should hold back the name
+                                # ie add it to a buffer instead of returning it as a chunk
+                                if self.function_name_buffer is None:
+                                    self.function_name_buffer = tool_call.function.name
+                                else:
+                                    self.function_name_buffer += tool_call.function.name
-        async with stream:
-            prev_message_type = None
-            message_index = 0
-            async for chunk in stream:
-                if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
-                    now = get_utc_timestamp_ns()
-                    ttft_ns = now - provider_request_start_timestamp_ns
-                    ttft_span.add_event(
-                        name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
-                    )
-                    first_chunk = False
-                if not self.model or not self.message_id:
-                    self.model = chunk.model
-                    self.message_id = chunk.id
-                # track usage
-                if chunk.usage:
-                    self.input_tokens += chunk.usage.prompt_tokens
-                    self.output_tokens += chunk.usage.completion_tokens
-                if chunk.choices:
-                    choice = chunk.choices[0]
-                    message_delta = choice.delta
-                    if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
-                        tool_call = message_delta.tool_calls[0]
-                        if tool_call.function.name:
-                            # If we're waiting for the first key, then we should hold back the name
-                            # ie add it to a buffer instead of returning it as a chunk
-                            if self.function_name_buffer is None:
-                                self.function_name_buffer = tool_call.function.name
-                            else:
-                                self.function_name_buffer += tool_call.function.name
-                        if tool_call.id:
-                            # Buffer until next time
-                            if self.function_id_buffer is None:
-                                self.function_id_buffer = tool_call.id
-                            else:
-                                self.function_id_buffer += tool_call.id
-                        if tool_call.function.arguments:
-                            # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
-                            self.current_function_arguments += tool_call.function.arguments
-                            updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
-                                tool_call.function.arguments
-                            )
-                            # If we have inner thoughts, we should output them as a chunk
-                            if updates_inner_thoughts:
-                                if prev_message_type and prev_message_type != "reasoning_message":
-                                    message_index += 1
-                                self.reasoning_messages.append(updates_inner_thoughts)
-                                reasoning_message = ReasoningMessage(
-                                    id=self.letta_message_id,
-                                    date=datetime.now(timezone.utc),
-                                    reasoning=updates_inner_thoughts,
-                                    # name=name,
-                                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                                )
-                                prev_message_type = reasoning_message.message_type
-                                yield reasoning_message
-                                # Additionally inner thoughts may stream back with a chunk of main JSON
-                                # In that case, since we can only return a chunk at a time, we should buffer it
-                                if updates_main_json:
-                                    if self.function_args_buffer is None:
-                                        self.function_args_buffer = updates_main_json
-                                    else:
-                                        self.function_args_buffer += updates_main_json
+                            if tool_call.id:
+                                # Buffer until next time
+                                if self.function_id_buffer is None:
+                                    self.function_id_buffer = tool_call.id
+                                else:
+                                    self.function_id_buffer += tool_call.id
-                            # If we have main_json, we should output a ToolCallMessage
-                            elif updates_main_json:
+                            if tool_call.function.arguments:
+                                # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                                self.current_function_arguments += tool_call.function.arguments
+                                updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
+                                    tool_call.function.arguments
+                                )
-                                # If there's something in the function_name buffer, we should release it first
-                                # NOTE: we could output it as part of a chunk that has both name and args,
-                                #       however the frontend may expect name first, then args, so to be
-                                #       safe we'll output name first in a separate chunk
-                                if self.function_name_buffer:
+                                # If we have inner thoughts, we should output them as a chunk
+                                if updates_inner_thoughts:
+                                    if prev_message_type and prev_message_type != "reasoning_message":
+                                        message_index += 1
+                                    self.reasoning_messages.append(updates_inner_thoughts)
+                                    reasoning_message = ReasoningMessage(
+                                        id=self.letta_message_id,
+                                        date=datetime.now(timezone.utc),
+                                        reasoning=updates_inner_thoughts,
+                                        # name=name,
+                                        otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                                    )
+                                    prev_message_type = reasoning_message.message_type
+                                    yield reasoning_message
+                                    # Additionally inner thoughts may stream back with a chunk of main JSON
+                                    # In that case, since we can only return a chunk at a time, we should buffer it
+                                    if updates_main_json:
+                                        if self.function_args_buffer is None:
+                                            self.function_args_buffer = updates_main_json
+                                        else:
+                                            self.function_args_buffer += updates_main_json
-                                    # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
-                                    if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
+                                # If we have main_json, we should output a ToolCallMessage
+                                elif updates_main_json:
-                                        # Store the ID of the tool call so allow skipping the corresponding response
-                                        if self.function_id_buffer:
-                                            self.prev_assistant_message_id = self.function_id_buffer
+                                    # If there's something in the function_name buffer, we should release it first
+                                    # NOTE: we could output it as part of a chunk that has both name and args,
+                                    #       however the frontend may expect name first, then args, so to be
+                                    #       safe we'll output name first in a separate chunk
+                                    if self.function_name_buffer:
-                                    else:
-                                        if prev_message_type and prev_message_type != "tool_call_message":
-                                            message_index += 1
-                                        self.tool_call_name = str(self.function_name_buffer)
-                                        tool_call_msg = ToolCallMessage(
-                                            id=self.letta_message_id,
-                                            date=datetime.now(timezone.utc),
-                                            tool_call=ToolCallDelta(
-                                                name=self.function_name_buffer,
-                                                arguments=None,
-                                                tool_call_id=self.function_id_buffer,
-                                            ),
-                                            otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                                        )
-                                        prev_message_type = tool_call_msg.message_type
-                                        yield tool_call_msg
-                                    # Record what the last function name we flushed was
-                                    self.last_flushed_function_name = self.function_name_buffer
-                                    if self.last_flushed_function_id is None:
-                                        self.last_flushed_function_id = self.function_id_buffer
-                                    # Clear the buffer
-                                    self.function_name_buffer = None
-                                    self.function_id_buffer = None
-                                    # Since we're clearing the name buffer, we should store
-                                    # any updates to the arguments inside a separate buffer
-                                    # Add any main_json updates to the arguments buffer
-                                    if self.function_args_buffer is None:
-                                        self.function_args_buffer = updates_main_json
-                                    else:
-                                        self.function_args_buffer += updates_main_json
+                                        # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
+                                        if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
-                                # If there was nothing in the name buffer, we can proceed to
-                                # output the arguments chunk as a ToolCallMessage
-                                else:
-                                    # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
-                                    if self.use_assistant_message and (
-                                        self.last_flushed_function_name is not None
-                                        and self.last_flushed_function_name == self.assistant_message_tool_name
-                                    ):
-                                        # do an additional parse on the updates_main_json
-                                        if self.function_args_buffer:
-                                            updates_main_json = self.function_args_buffer + updates_main_json
-                                            self.function_args_buffer = None
-                                            # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
-                                            match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
-                                            if updates_main_json == match_str:
-                                                updates_main_json = None
-                                        else:
-                                            # Some hardcoding to strip off the trailing "}"
-                                            if updates_main_json in ["}", '"}']:
-                                                updates_main_json = None
-                                            if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
-                                                updates_main_json = updates_main_json[:-1]
-                                        if not updates_main_json:
-                                            # early exit to turn into content mode
-                                            continue
-                                        # There may be a buffer from a previous chunk, for example
-                                        # if the previous chunk had arguments but we needed to flush name
-                                        if self.function_args_buffer:
-                                            # In this case, we should release the buffer + new data at once
-                                            combined_chunk = self.function_args_buffer + updates_main_json
-                                            if prev_message_type and prev_message_type != "assistant_message":
-                                                message_index += 1
-                                            assistant_message = AssistantMessage(
-                                                id=self.letta_message_id,
-                                                date=datetime.now(timezone.utc),
-                                                content=combined_chunk,
-                                                otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                                            )
-                                            prev_message_type = assistant_message.message_type
-                                            yield assistant_message
                                             # Store the ID of the tool call so allow skipping the corresponding response
                                             if self.function_id_buffer:
                                                 self.prev_assistant_message_id = self.function_id_buffer
-                                            # clear buffer
-                                            self.function_args_buffer = None
-                                            self.function_id_buffer = None
                                         else:
-                                            # If there's no buffer to clear, just output a new chunk with new data
-                                            # TODO: THIS IS HORRIBLE
-                                            # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
-                                            # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
-                                            parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
-                                            if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
-                                                self.assistant_message_tool_kwarg
-                                            ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
-                                                new_content = parsed_args.get(self.assistant_message_tool_kwarg)
-                                                prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
-                                                # TODO: Assumes consistent state and that prev_content is subset of new_content
-                                                diff = new_content.replace(prev_content, "", 1)
-                                                self.current_json_parse_result = parsed_args
-                                                if prev_message_type and prev_message_type != "assistant_message":
-                                                    message_index += 1
-                                                assistant_message = AssistantMessage(
-                                                    id=self.letta_message_id,
-                                                    date=datetime.now(timezone.utc),
-                                                    content=diff,
-                                                    # name=name,
-                                                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                                                )
-                                                prev_message_type = assistant_message.message_type
-                                                yield assistant_message
-                                            # Store the ID of the tool call so allow skipping the corresponding response
-                                            if self.function_id_buffer:
-                                                self.prev_assistant_message_id = self.function_id_buffer
-                                            # clear buffers
-                                            self.function_id_buffer = None
-                                    else:
-                                        # There may be a buffer from a previous chunk, for example
-                                        # if the previous chunk had arguments but we needed to flush name
-                                        if self.function_args_buffer:
-                                            # In this case, we should release the buffer + new data at once
-                                            combined_chunk = self.function_args_buffer + updates_main_json
                                             if prev_message_type and prev_message_type != "tool_call_message":
                                                 message_index += 1
+                                            self.tool_call_name = str(self.function_name_buffer)
                                             tool_call_msg = ToolCallMessage(
                                                 id=self.letta_message_id,
                                                 date=datetime.now(timezone.utc),
                                                 tool_call=ToolCallDelta(
                                                     name=self.function_name_buffer,
-                                                    arguments=combined_chunk,
+                                                    arguments=None,
                                                     tool_call_id=self.function_id_buffer,
                                                 ),
-                                                # name=name,
                                                 otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
                                             )
                                             prev_message_type = tool_call_msg.message_type
                                             yield tool_call_msg
-                                            # clear buffer
-                                            self.function_args_buffer = None
-                                            self.function_id_buffer = None
+                                        # Record what the last function name we flushed was
+                                        self.last_flushed_function_name = self.function_name_buffer
+                                        if self.last_flushed_function_id is None:
+                                            self.last_flushed_function_id = self.function_id_buffer
+                                        # Clear the buffer
+                                        self.function_name_buffer = None
+                                        self.function_id_buffer = None
+                                        # Since we're clearing the name buffer, we should store
+                                        # any updates to the arguments inside a separate buffer
+                                        # Add any main_json updates to the arguments buffer
+                                        if self.function_args_buffer is None:
+                                            self.function_args_buffer = updates_main_json
                                         else:
-                                            # If there's no buffer to clear, just output a new chunk with new data
-                                            if prev_message_type and prev_message_type != "tool_call_message":
-                                                message_index += 1
-                                            tool_call_msg = ToolCallMessage(
-                                                id=self.letta_message_id,
-                                                date=datetime.now(timezone.utc),
-                                                tool_call=ToolCallDelta(
-                                                    name=None,
-                                                    arguments=updates_main_json,
-                                                    tool_call_id=self.function_id_buffer,
-                                                ),
-                                                # name=name,
-                                                otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                                            )
-                                            prev_message_type = tool_call_msg.message_type
-                                            yield tool_call_msg
-                                            self.function_id_buffer = None
+                                            self.function_args_buffer += updates_main_json
+                                    # If there was nothing in the name buffer, we can proceed to
+                                    # output the arguments chunk as a ToolCallMessage
+                                    else:
+                                        # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
+                                        if self.use_assistant_message and (
+                                            self.last_flushed_function_name is not None
+                                            and self.last_flushed_function_name == self.assistant_message_tool_name
+                                        ):
+                                            # do an additional parse on the updates_main_json
+                                            if self.function_args_buffer:
+                                                updates_main_json = self.function_args_buffer + updates_main_json
+                                                self.function_args_buffer = None
+                                                # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
+                                                match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
+                                                if updates_main_json == match_str:
+                                                    updates_main_json = None
+                                            else:
+                                                # Some hardcoding to strip off the trailing "}"
+                                                if updates_main_json in ["}", '"}']:
+                                                    updates_main_json = None
+                                                if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
+                                                    updates_main_json = updates_main_json[:-1]
+                                            if not updates_main_json:
+                                                # early exit to turn into content mode
+                                                continue
+                                            # There may be a buffer from a previous chunk, for example
+                                            # if the previous chunk had arguments but we needed to flush name
+                                            if self.function_args_buffer:
+                                                # In this case, we should release the buffer + new data at once
+                                                combined_chunk = self.function_args_buffer + updates_main_json
+                                                if prev_message_type and prev_message_type != "assistant_message":
+                                                    message_index += 1
+                                                assistant_message = AssistantMessage(
+                                                    id=self.letta_message_id,
+                                                    date=datetime.now(timezone.utc),
+                                                    content=combined_chunk,
+                                                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                                                )
+                                                prev_message_type = assistant_message.message_type
+                                                yield assistant_message
+                                                # Store the ID of the tool call so allow skipping the corresponding response
+                                                if self.function_id_buffer:
+                                                    self.prev_assistant_message_id = self.function_id_buffer
+                                                # clear buffer
+                                                self.function_args_buffer = None
+                                                self.function_id_buffer = None
+                                            else:
+                                                # If there's no buffer to clear, just output a new chunk with new data
+                                                # TODO: THIS IS HORRIBLE
+                                                # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
+                                                # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
+                                                parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
+                                                if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
+                                                    self.assistant_message_tool_kwarg
+                                                ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
+                                                    new_content = parsed_args.get(self.assistant_message_tool_kwarg)
+                                                    prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
+                                                    # TODO: Assumes consistent state and that prev_content is subset of new_content
+                                                    diff = new_content.replace(prev_content, "", 1)
+                                                    self.current_json_parse_result = parsed_args
+                                                    if prev_message_type and prev_message_type != "assistant_message":
+                                                        message_index += 1
+                                                    assistant_message = AssistantMessage(
+                                                        id=self.letta_message_id,
+                                                        date=datetime.now(timezone.utc),
+                                                        content=diff,
+                                                        # name=name,
+                                                        otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                                                    )
+                                                    prev_message_type = assistant_message.message_type
+                                                    yield assistant_message
+                                                # Store the ID of the tool call so allow skipping the corresponding response
+                                                if self.function_id_buffer:
+                                                    self.prev_assistant_message_id = self.function_id_buffer
+                                                # clear buffers
+                                                self.function_id_buffer = None
+                                        else:
+                                            # There may be a buffer from a previous chunk, for example
+                                            # if the previous chunk had arguments but we needed to flush name
+                                            if self.function_args_buffer:
+                                                # In this case, we should release the buffer + new data at once
+                                                combined_chunk = self.function_args_buffer + updates_main_json
+                                                if prev_message_type and prev_message_type != "tool_call_message":
+                                                    message_index += 1
+                                                tool_call_msg = ToolCallMessage(
+                                                    id=self.letta_message_id,
+                                                    date=datetime.now(timezone.utc),
+                                                    tool_call=ToolCallDelta(
+                                                        name=self.function_name_buffer,
+                                                        arguments=combined_chunk,
+                                                        tool_call_id=self.function_id_buffer,
+                                                    ),
+                                                    # name=name,
+                                                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                                                )
+                                                prev_message_type = tool_call_msg.message_type
+                                                yield tool_call_msg
+                                                # clear buffer
+                                                self.function_args_buffer = None
+                                                self.function_id_buffer = None
+                                            else:
+                                                # If there's no buffer to clear, just output a new chunk with new data
+                                                if prev_message_type and prev_message_type != "tool_call_message":
+                                                    message_index += 1
+                                                tool_call_msg = ToolCallMessage(
+                                                    id=self.letta_message_id,
+                                                    date=datetime.now(timezone.utc),
+                                                    tool_call=ToolCallDelta(
+                                                        name=None,
+                                                        arguments=updates_main_json,
+                                                        tool_call_id=self.function_id_buffer,
+                                                    ),
+                                                    # name=name,
+                                                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                                                )
+                                                prev_message_type = tool_call_msg.message_type
+                                                yield tool_call_msg
+                                                self.function_id_buffer = None
+        except Exception as e:
+            logger.error("Error processing stream: %s", e)
+            stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+            yield stop_reason
+            raise
+        finally:
+            logger.info("OpenAIStreamingInterface: Stream processing complete.")

letta/llm_api/openai_client.py CHANGED Viewed

@@ -286,7 +286,7 @@ class OpenAIClient(LLMClientBase):
         # If we used a reasoning model, create a content part for the ommitted reasoning
         if is_openai_reasoning_model(llm_config.model):
-            chat_completion_response.choices[0].message.ommitted_reasoning_content = True
+            chat_completion_response.choices[0].message.omitted_reasoning_content = True
         return chat_completion_response

letta/local_llm/utils.py CHANGED Viewed

@@ -100,7 +100,11 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
                         try:
                             if field == "type":
                                 function_tokens += 2
-                                function_tokens += len(encoding.encode(v["type"]))
+                                # Handle both string and array types, e.g. {"type": ["string", "null"]}
+                                if isinstance(v["type"], list):
+                                    function_tokens += len(encoding.encode(",".join(v["type"])))
+                                else:
+                                    function_tokens += len(encoding.encode(v["type"]))
                             elif field == "description":
                                 function_tokens += 2
                                 function_tokens += len(encoding.encode(v["description"]))

letta/orm/enums.py CHANGED Viewed

@@ -38,3 +38,4 @@ class ActorType(str, Enum):
 class MCPServerType(str, Enum):
     SSE = "sse"
     STDIO = "stdio"
+    STREAMABLE_HTTP = "streamable_http"

letta/orm/mcp_server.py CHANGED Viewed

@@ -36,6 +36,9 @@ class MCPServer(SqlalchemyBase, OrganizationMixin):
         String, nullable=True, doc="The URL of the server (MCP SSE client will connect to this URL)"
     )
+    # access token / api key for MCP servers that require authentication
+    token: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The access token or api key for the MCP server")
     # stdio server
     stdio_config: Mapped[Optional[StdioServerConfig]] = mapped_column(
         MCPStdioServerConfigColumn, nullable=True, doc="The configuration for the stdio server"

letta/orm/tool.py CHANGED Viewed

@@ -44,6 +44,9 @@ class Tool(SqlalchemyBase, OrganizationMixin):
     source_code: Mapped[Optional[str]] = mapped_column(String, doc="The source code of the function.")
     json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The OAI compatable JSON schema of the function.")
     args_json_schema: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="The JSON schema of the function arguments.")
+    pip_requirements: Mapped[Optional[List]] = mapped_column(
+        JSON, nullable=True, doc="Optional list of pip packages required by this tool."
+    )
     metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default=lambda: {}, doc="A dictionary of additional metadata for the tool.")
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="tools", lazy="selectin")

letta/otel/metric_registry.py CHANGED Viewed

@@ -95,6 +95,18 @@ class MetricRegistry:
             ),
         )
+    @property
+    def step_execution_time_ms_histogram(self) -> Histogram:
+        return self._get_or_create_metric(
+            "hist_step_execution_time_ms",
+            partial(
+                self._meter.create_histogram,
+                name="hist_step_execution_time_ms",
+                description="Histogram for step execution time (ms)",
+                unit="ms",
+            ),
+        )
     # TODO (cliandy): instrument this
     @property
     def message_cost(self) -> Histogram:

letta-nightly 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl

letta-nightly 0.8.4.dev20250614104137py3-none-any.whl → 0.8.4.dev20250615221417py3-none-any.whl