PyPI - letta-nightly - Versions diffs - 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl - Mend

letta-nightly 0.6.27.dev20250220104103py3-none-any.whl → 0.6.29.dev20250221033538py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (66) hide show

letta/__init__.py +1 -1
letta/agent.py +19 -2
letta/client/client.py +2 -0
letta/constants.py +2 -0
letta/functions/schema_generator.py +6 -6
letta/helpers/converters.py +153 -0
letta/helpers/tool_rule_solver.py +11 -1
letta/llm_api/anthropic.py +10 -5
letta/llm_api/aws_bedrock.py +1 -1
letta/llm_api/deepseek.py +303 -0
letta/llm_api/helpers.py +20 -10
letta/llm_api/llm_api_tools.py +85 -2
letta/llm_api/openai.py +16 -1
letta/local_llm/chat_completion_proxy.py +15 -2
letta/local_llm/lmstudio/api.py +75 -1
letta/orm/__init__.py +2 -0
letta/orm/agent.py +11 -4
letta/orm/custom_columns.py +31 -110
letta/orm/identities_agents.py +13 -0
letta/orm/identity.py +60 -0
letta/orm/organization.py +2 -0
letta/orm/sqlalchemy_base.py +4 -0
letta/schemas/agent.py +11 -1
letta/schemas/identity.py +67 -0
letta/schemas/llm_config.py +2 -0
letta/schemas/message.py +1 -1
letta/schemas/openai/chat_completion_response.py +2 -0
letta/schemas/providers.py +72 -1
letta/schemas/tool_rule.py +9 -1
letta/serialize_schemas/__init__.py +1 -0
letta/serialize_schemas/agent.py +36 -0
letta/serialize_schemas/base.py +12 -0
letta/serialize_schemas/custom_fields.py +69 -0
letta/serialize_schemas/message.py +15 -0
letta/server/db.py +111 -0
letta/server/rest_api/app.py +8 -0
letta/server/rest_api/chat_completions_interface.py +45 -21
letta/server/rest_api/interface.py +114 -9
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +98 -24
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +14 -3
letta/server/rest_api/routers/v1/identities.py +121 -0
letta/server/rest_api/utils.py +183 -4
letta/server/server.py +23 -117
letta/services/agent_manager.py +53 -6
letta/services/block_manager.py +1 -1
letta/services/identity_manager.py +156 -0
letta/services/job_manager.py +1 -1
letta/services/message_manager.py +1 -1
letta/services/organization_manager.py +1 -1
letta/services/passage_manager.py +1 -1
letta/services/provider_manager.py +1 -1
letta/services/sandbox_config_manager.py +1 -1
letta/services/source_manager.py +1 -1
letta/services/step_manager.py +1 -1
letta/services/tool_manager.py +1 -1
letta/services/user_manager.py +1 -1
letta/settings.py +3 -0
letta/streaming_interface.py +6 -2
letta/tracing.py +205 -0
letta/utils.py +4 -0
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/METADATA +9 -2
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/RECORD +66 -52
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/entry_points.txt +0 -0

letta/schemas/providers.py CHANGED Viewed

@@ -211,6 +211,75 @@ class OpenAIProvider(Provider):
             return None
+class DeepSeekProvider(OpenAIProvider):
+    """
+    DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
+    but with slight differences:
+    * For example, DeepSeek's API requires perfect interleaving of user/assistant
+    * It also does not support native function calling
+    """
+    name: str = "deepseek"
+    base_url: str = Field("https://api.deepseek.com/v1", description="Base URL for the DeepSeek API.")
+    api_key: str = Field(..., description="API key for the DeepSeek API.")
+    def get_model_context_window_size(self, model_name: str) -> Optional[int]:
+        # DeepSeek doesn't return context window in the model listing,
+        # so these are hardcoded from their website
+        if model_name == "deepseek-reasoner":
+            return 64000
+        elif model_name == "deepseek-chat":
+            return 64000
+        else:
+            return None
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.openai import openai_get_model_list
+        response = openai_get_model_list(self.base_url, api_key=self.api_key)
+        if "data" in response:
+            data = response["data"]
+        else:
+            data = response
+        configs = []
+        for model in data:
+            assert "id" in model, f"DeepSeek model missing 'id' field: {model}"
+            model_name = model["id"]
+            # In case DeepSeek starts supporting it in the future:
+            if "context_length" in model:
+                # Context length is returned in OpenRouter as "context_length"
+                context_window_size = model["context_length"]
+            else:
+                context_window_size = self.get_model_context_window_size(model_name)
+            if not context_window_size:
+                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                continue
+            # Not used for deepseek-reasoner, but otherwise is true
+            put_inner_thoughts_in_kwargs = False if model_name == "deepseek-reasoner" else True
+            configs.append(
+                LLMConfig(
+                    model=model_name,
+                    model_endpoint_type="deepseek",
+                    model_endpoint=self.base_url,
+                    context_window=context_window_size,
+                    handle=self.get_handle(model_name),
+                    put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # No embeddings supported
+        return []
 class LMStudioOpenAIProvider(OpenAIProvider):
     name: str = "lmstudio-openai"
     base_url: str = Field(..., description="Base URL for the LMStudio OpenAI API.")
@@ -945,4 +1014,6 @@ class AnthropicBedrockProvider(Provider):
         return bedrock_get_model_context_window(model_name)
     def get_handle(self, model_name: str) -> str:
-        return f"anthropic/{model_name}"
+        print(model_name)
+        model = model_name.split(".")[-1]
+        return f"bedrock/{model}"

letta/schemas/tool_rule.py CHANGED Viewed

@@ -48,7 +48,15 @@ class TerminalToolRule(BaseToolRule):
     type: Literal[ToolRuleType.exit_loop] = ToolRuleType.exit_loop
+class ContinueToolRule(BaseToolRule):
+    """
+    Represents a tool rule configuration where if this tool gets called, it must continue the agent loop.
+    """
+    type: Literal[ToolRuleType.continue_loop] = ToolRuleType.continue_loop
 ToolRule = Annotated[
-    Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule],
+    Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule, ContinueToolRule],
     Field(discriminator="type"),
 ]

letta/serialize_schemas/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from letta.serialize_schemas.agent import SerializedAgentSchema

letta/serialize_schemas/agent.py ADDED Viewed

@@ -0,0 +1,36 @@
+from marshmallow import fields
+from letta.orm import Agent
+from letta.serialize_schemas.base import BaseSchema
+from letta.serialize_schemas.custom_fields import EmbeddingConfigField, LLMConfigField, ToolRulesField
+from letta.serialize_schemas.message import SerializedMessageSchema
+class SerializedAgentSchema(BaseSchema):
+    """
+    Marshmallow schema for serializing/deserializing Agent objects.
+    Excludes relational fields.
+    """
+    llm_config = LLMConfigField()
+    embedding_config = EmbeddingConfigField()
+    tool_rules = ToolRulesField()
+    messages = fields.List(fields.Nested(SerializedMessageSchema))
+    def __init__(self, *args, session=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        if session:
+            self.session = session
+            # propagate session to nested schemas
+            for field_name, field_obj in self.fields.items():
+                if isinstance(field_obj, fields.List) and hasattr(field_obj.inner, "schema"):
+                    field_obj.inner.schema.session = session
+                elif hasattr(field_obj, "schema"):
+                    field_obj.schema.session = session
+    class Meta(BaseSchema.Meta):
+        model = Agent
+        # TODO: Serialize these as well...
+        exclude = ("tools", "sources", "core_memory", "tags", "source_passages", "agent_passages", "organization")

letta/serialize_schemas/base.py ADDED Viewed

@@ -0,0 +1,12 @@
+from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
+class BaseSchema(SQLAlchemyAutoSchema):
+    """
+    Base schema for all SQLAlchemy models.
+    This ensures all schemas share the same session.
+    """
+    class Meta:
+        include_relationships = True
+        load_instance = True

letta/serialize_schemas/custom_fields.py ADDED Viewed

@@ -0,0 +1,69 @@
+from marshmallow import fields
+from letta.helpers.converters import (
+    deserialize_embedding_config,
+    deserialize_llm_config,
+    deserialize_tool_calls,
+    deserialize_tool_rules,
+    serialize_embedding_config,
+    serialize_llm_config,
+    serialize_tool_calls,
+    serialize_tool_rules,
+)
+class PydanticField(fields.Field):
+    """Generic Marshmallow field for handling Pydantic models."""
+    def __init__(self, pydantic_class, **kwargs):
+        self.pydantic_class = pydantic_class
+        super().__init__(**kwargs)
+    def _serialize(self, value, attr, obj, **kwargs):
+        return value.model_dump() if value else None
+    def _deserialize(self, value, attr, data, **kwargs):
+        return self.pydantic_class(**value) if value else None
+class LLMConfigField(fields.Field):
+    """Marshmallow field for handling LLMConfig serialization."""
+    def _serialize(self, value, attr, obj, **kwargs):
+        return serialize_llm_config(value)
+    def _deserialize(self, value, attr, data, **kwargs):
+        return deserialize_llm_config(value)
+class EmbeddingConfigField(fields.Field):
+    """Marshmallow field for handling EmbeddingConfig serialization."""
+    def _serialize(self, value, attr, obj, **kwargs):
+        return serialize_embedding_config(value)
+    def _deserialize(self, value, attr, data, **kwargs):
+        return deserialize_embedding_config(value)
+class ToolRulesField(fields.List):
+    """Custom Marshmallow field to handle a list of ToolRules."""
+    def __init__(self, **kwargs):
+        super().__init__(fields.Dict(), **kwargs)
+    def _serialize(self, value, attr, obj, **kwargs):
+        return serialize_tool_rules(value)
+    def _deserialize(self, value, attr, data, **kwargs):
+        return deserialize_tool_rules(value)
+class ToolCallField(fields.Field):
+    """Marshmallow field for handling a list of OpenAI ToolCall objects."""
+    def _serialize(self, value, attr, obj, **kwargs):
+        return serialize_tool_calls(value)
+    def _deserialize(self, value, attr, data, **kwargs):
+        return deserialize_tool_calls(value)

letta/serialize_schemas/message.py ADDED Viewed

@@ -0,0 +1,15 @@
+from letta.orm.message import Message
+from letta.serialize_schemas.base import BaseSchema
+from letta.serialize_schemas.custom_fields import ToolCallField
+class SerializedMessageSchema(BaseSchema):
+    """
+    Marshmallow schema for serializing/deserializing Message objects.
+    """
+    tool_calls = ToolCallField()
+    class Meta(BaseSchema.Meta):
+        model = Message
+        exclude = ("step", "job_message")

letta/server/db.py ADDED Viewed

@@ -0,0 +1,111 @@
+import os
+from contextlib import contextmanager
+from rich.console import Console
+from rich.panel import Panel
+from rich.text import Text
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from letta.config import LettaConfig
+from letta.log import get_logger
+from letta.orm import Base
+# NOTE: hack to see if single session management works
+from letta.settings import settings
+config = LettaConfig.load()
+logger = get_logger(__name__)
+def print_sqlite_schema_error():
+    """Print a formatted error message for SQLite schema issues"""
+    console = Console()
+    error_text = Text()
+    error_text.append("Existing SQLite DB schema is invalid, and schema migrations are not supported for SQLite. ", style="bold red")
+    error_text.append("To have migrations supported between Letta versions, please run Letta with Docker (", style="white")
+    error_text.append("https://docs.letta.com/server/docker", style="blue underline")
+    error_text.append(") or use Postgres by setting ", style="white")
+    error_text.append("LETTA_PG_URI", style="yellow")
+    error_text.append(".\n\n", style="white")
+    error_text.append("If you wish to keep using SQLite, you can reset your database by removing the DB file with ", style="white")
+    error_text.append("rm ~/.letta/sqlite.db", style="yellow")
+    error_text.append(" or downgrade to your previous version of Letta.", style="white")
+    console.print(Panel(error_text, border_style="red"))
+@contextmanager
+def db_error_handler():
+    """Context manager for handling database errors"""
+    try:
+        yield
+    except Exception as e:
+        # Handle other SQLAlchemy errors
+        print(e)
+        print_sqlite_schema_error()
+        # raise ValueError(f"SQLite DB error: {str(e)}")
+        exit(1)
+if settings.letta_pg_uri_no_default:
+    print("Creating postgres engine")
+    config.recall_storage_type = "postgres"
+    config.recall_storage_uri = settings.letta_pg_uri_no_default
+    config.archival_storage_type = "postgres"
+    config.archival_storage_uri = settings.letta_pg_uri_no_default
+    # create engine
+    engine = create_engine(
+        settings.letta_pg_uri,
+        pool_size=settings.pg_pool_size,
+        max_overflow=settings.pg_max_overflow,
+        pool_timeout=settings.pg_pool_timeout,
+        pool_recycle=settings.pg_pool_recycle,
+        echo=settings.pg_echo,
+    )
+else:
+    # TODO: don't rely on config storage
+    engine_path = "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db")
+    logger.info("Creating sqlite engine " + engine_path)
+    engine = create_engine(engine_path)
+    # Store the original connect method
+    original_connect = engine.connect
+    def wrapped_connect(*args, **kwargs):
+        with db_error_handler():
+            # Get the connection
+            connection = original_connect(*args, **kwargs)
+            # Store the original execution method
+            original_execute = connection.execute
+            # Wrap the execute method of the connection
+            def wrapped_execute(*args, **kwargs):
+                with db_error_handler():
+                    return original_execute(*args, **kwargs)
+            # Replace the connection's execute method
+            connection.execute = wrapped_execute
+            return connection
+    # Replace the engine's connect method
+    engine.connect = wrapped_connect
+    Base.metadata.create_all(bind=engine)
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+db_context = contextmanager(get_db)

letta/server/rest_api/app.py CHANGED Viewed

@@ -231,6 +231,14 @@ def create_application() -> "FastAPI":
         allow_headers=["*"],
     )
+    # Set up OpenTelemetry tracing
+    endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
+    if endpoint:
+        print(f"▶ Using OTLP tracing with endpoint: {endpoint}")
+        from letta.tracing import setup_tracing
+        setup_tracing(endpoint=endpoint, service_name="memgpt-server")
     for route in v1_routes:
         app.include_router(route, prefix=API_PREFIX)
         # this gives undocumented routes for "latest" and bare api calls.

letta/server/rest_api/chat_completions_interface.py CHANGED Viewed

@@ -56,6 +56,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         self.current_function_name = ""
         self.current_function_arguments = []
         self.current_json_parse_result = {}
+        self._found_message_tool_kwarg = False
         # Internal chunk buffer and event for async notification
         self._chunks = deque()
@@ -153,12 +154,13 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         """No-op retained for interface compatibility."""
         return
-    def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime) -> None:
+    def process_chunk(
+        self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime, expect_reasoning_content: bool = False
+    ) -> None:
         """
         Called externally with a ChatCompletionChunkResponse. Transforms
         it if necessary, then enqueues partial messages for streaming back.
         """
-        # print("RECEIVED CHUNK...")
         processed_chunk = self._process_chunk_to_openai_style(chunk)
         if processed_chunk is not None:
             self._push_to_buffer(processed_chunk)
@@ -197,6 +199,10 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         content (especially from a 'send_message' tool) is exposed as text
         deltas in 'content'. Otherwise, pass through or yield finish reasons.
         """
+        # If we've already sent the final chunk, ignore everything.
+        if self._found_message_tool_kwarg:
+            return None
         choice = chunk.choices[0]
         delta = choice.delta
@@ -219,25 +225,43 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                 combined_args = "".join(self.current_function_arguments)
                 parsed_args = OptimisticJSONParser().parse(combined_args)
-                # If the parsed result is different
-                # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
-                if parsed_args != self.current_json_parse_result:
-                    self.current_json_parse_result = parsed_args
-                    # If we can see a "message" field, return it as partial content
-                    if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
-                        return ChatCompletionChunk(
-                            id=chunk.id,
-                            object=chunk.object,
-                            created=chunk.created.timestamp(),
-                            model=chunk.model,
-                            choices=[
-                                Choice(
-                                    index=choice.index,
-                                    delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
-                                    finish_reason=None,
-                                )
-                            ],
-                        )
+                # TODO: Make this less brittle! This depends on `message` coming first!
+                # This is a heuristic we use to know if we're done with the `message` part of `send_message`
+                if len(parsed_args.keys()) > 1:
+                    self._found_message_tool_kwarg = True
+                    return ChatCompletionChunk(
+                        id=chunk.id,
+                        object=chunk.object,
+                        created=chunk.created.timestamp(),
+                        model=chunk.model,
+                        choices=[
+                            Choice(
+                                index=choice.index,
+                                delta=ChoiceDelta(),
+                                finish_reason="stop",
+                            )
+                        ],
+                    )
+                else:
+                    # If the parsed result is different
+                    # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
+                    if parsed_args != self.current_json_parse_result:
+                        self.current_json_parse_result = parsed_args
+                        # If we can see a "message" field, return it as partial content
+                        if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
+                            return ChatCompletionChunk(
+                                id=chunk.id,
+                                object=chunk.object,
+                                created=chunk.created.timestamp(),
+                                model=chunk.model,
+                                choices=[
+                                    Choice(
+                                        index=choice.index,
+                                        delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
+                                        finish_reason=None,
+                                    )
+                                ],
+                            )
         # If there's a finish reason, pass that along
         if choice.finish_reason is not None:

letta/server/rest_api/interface.py CHANGED Viewed

@@ -317,6 +317,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         self.debug = False
         self.timeout = 10 * 60  # 10 minute timeout
+        # for expect_reasoning_content, we should accumulate `content`
+        self.expect_reasoning_content_buffer = None
     def _reset_inner_thoughts_json_reader(self):
         # A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
         self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=self.inner_thoughts_kwarg, wait_for_first_key=True)
@@ -387,6 +390,39 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         # Wipe the inner thoughts buffers
         self._reset_inner_thoughts_json_reader()
+        # If we were in reasoning mode and accumulated a json block, attempt to release it as chunks
+        # if self.expect_reasoning_content_buffer is not None:
+        #     try:
+        #         # NOTE: this is hardcoded for our DeepSeek API integration
+        #         json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
+        #         if "name" in json_reasoning_content:
+        #             self._push_to_buffer(
+        #                 ToolCallMessage(
+        #                     id=message_id,
+        #                     date=message_date,
+        #                     tool_call=ToolCallDelta(
+        #                         name=json_reasoning_content["name"],
+        #                         arguments=None,
+        #                         tool_call_id=None,
+        #                     ),
+        #                 )
+        #             )
+        #         if "arguments" in json_reasoning_content:
+        #             self._push_to_buffer(
+        #                 ToolCallMessage(
+        #                     id=message_id,
+        #                     date=message_date,
+        #                     tool_call=ToolCallDelta(
+        #                         name=None,
+        #                         arguments=json_reasoning_content["arguments"],
+        #                         tool_call_id=None,
+        #                     ),
+        #                 )
+        #             )
+        #     except Exception as e:
+        #         print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
     def step_complete(self):
         """Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
         if not self.multi_step:
@@ -410,7 +446,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         return
     def _process_chunk_to_letta_style(
-        self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime
+        self,
+        chunk: ChatCompletionChunkResponse,
+        message_id: str,
+        message_date: datetime,
+        # if we expect `reasoning_content``, then that's what gets mapped to ReasoningMessage
+        # and `content` needs to be handled outside the interface
+        expect_reasoning_content: bool = False,
     ) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
         """
         Example data from non-streaming response looks like:
@@ -426,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         if (
             message_delta.content is None
+            and (expect_reasoning_content and message_delta.reasoning_content is None)
             and message_delta.tool_calls is None
             and message_delta.function_call is None
             and choice.finish_reason is None
@@ -435,17 +478,68 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             return None
         # inner thoughts
-        if message_delta.content is not None:
-            if message_delta.content == "":
-                print("skipping empty content")
-                processed_chunk = None
+        if expect_reasoning_content and message_delta.reasoning_content is not None:
+            processed_chunk = ReasoningMessage(
+                id=message_id,
+                date=message_date,
+                reasoning=message_delta.reasoning_content,
+            )
+        elif expect_reasoning_content and message_delta.content is not None:
+            # "ignore" content if we expect reasoning content
+            if self.expect_reasoning_content_buffer is None:
+                self.expect_reasoning_content_buffer = message_delta.content
             else:
-                processed_chunk = ReasoningMessage(
+                self.expect_reasoning_content_buffer += message_delta.content
+            # we expect this to be pure JSON
+            # OptimisticJSONParser
+            # If we can pull a name out, pull it
+            try:
+                # NOTE: this is hardcoded for our DeepSeek API integration
+                json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
+                print(f"json_reasoning_content: {json_reasoning_content}")
+                processed_chunk = ToolCallMessage(
                     id=message_id,
                     date=message_date,
-                    reasoning=message_delta.content,
+                    tool_call=ToolCallDelta(
+                        name=json_reasoning_content.get("name"),
+                        arguments=json.dumps(json_reasoning_content.get("arguments")),
+                        tool_call_id=None,
+                    ),
                 )
+            except json.JSONDecodeError as e:
+                print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
+                return None
+            # Else,
+            # return None
+            # processed_chunk = ToolCallMessage(
+            #     id=message_id,
+            #     date=message_date,
+            #     tool_call=ToolCallDelta(
+            #         # name=tool_call_delta.get("name"),
+            #         name=None,
+            #         arguments=message_delta.content,
+            #         # tool_call_id=tool_call_delta.get("id"),
+            #         tool_call_id=None,
+            #     ),
+            # )
+            # return processed_chunk
+            # TODO eventually output as tool call outputs?
+            # print(f"Hiding content delta stream: '{message_delta.content}'")
+            # return None
+        elif message_delta.content is not None:
+            processed_chunk = ReasoningMessage(
+                id=message_id,
+                date=message_date,
+                reasoning=message_delta.content,
+            )
         # tool calls
         elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
             tool_call = message_delta.tool_calls[0]
@@ -890,7 +984,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         return processed_chunk
-    def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime):
+    def process_chunk(
+        self,
+        chunk: ChatCompletionChunkResponse,
+        message_id: str,
+        message_date: datetime,
+        expect_reasoning_content: bool = False,
+    ):
         """Process a streaming chunk from an OpenAI-compatible server.
         Example data from non-streaming response looks like:
@@ -910,7 +1010,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             # processed_chunk = self._process_chunk_to_openai_style(chunk)
             raise NotImplementedError("OpenAI proxy streaming temporarily disabled")
         else:
-            processed_chunk = self._process_chunk_to_letta_style(chunk=chunk, message_id=message_id, message_date=message_date)
+            processed_chunk = self._process_chunk_to_letta_style(
+                chunk=chunk,
+                message_id=message_id,
+                message_date=message_date,
+                expect_reasoning_content=expect_reasoning_content,
+            )
         if processed_chunk is None:
             return

letta-nightly 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.27.dev20250220104103py3-none-any.whl → 0.6.29.dev20250221033538py3-none-any.whl