letta-nightly 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (66) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +19 -2
  3. letta/client/client.py +2 -0
  4. letta/constants.py +2 -0
  5. letta/functions/schema_generator.py +6 -6
  6. letta/helpers/converters.py +153 -0
  7. letta/helpers/tool_rule_solver.py +11 -1
  8. letta/llm_api/anthropic.py +10 -5
  9. letta/llm_api/aws_bedrock.py +1 -1
  10. letta/llm_api/deepseek.py +303 -0
  11. letta/llm_api/helpers.py +20 -10
  12. letta/llm_api/llm_api_tools.py +85 -2
  13. letta/llm_api/openai.py +16 -1
  14. letta/local_llm/chat_completion_proxy.py +15 -2
  15. letta/local_llm/lmstudio/api.py +75 -1
  16. letta/orm/__init__.py +2 -0
  17. letta/orm/agent.py +11 -4
  18. letta/orm/custom_columns.py +31 -110
  19. letta/orm/identities_agents.py +13 -0
  20. letta/orm/identity.py +60 -0
  21. letta/orm/organization.py +2 -0
  22. letta/orm/sqlalchemy_base.py +4 -0
  23. letta/schemas/agent.py +11 -1
  24. letta/schemas/identity.py +67 -0
  25. letta/schemas/llm_config.py +2 -0
  26. letta/schemas/message.py +1 -1
  27. letta/schemas/openai/chat_completion_response.py +2 -0
  28. letta/schemas/providers.py +72 -1
  29. letta/schemas/tool_rule.py +9 -1
  30. letta/serialize_schemas/__init__.py +1 -0
  31. letta/serialize_schemas/agent.py +36 -0
  32. letta/serialize_schemas/base.py +12 -0
  33. letta/serialize_schemas/custom_fields.py +69 -0
  34. letta/serialize_schemas/message.py +15 -0
  35. letta/server/db.py +111 -0
  36. letta/server/rest_api/app.py +8 -0
  37. letta/server/rest_api/chat_completions_interface.py +45 -21
  38. letta/server/rest_api/interface.py +114 -9
  39. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +98 -24
  40. letta/server/rest_api/routers/v1/__init__.py +2 -0
  41. letta/server/rest_api/routers/v1/agents.py +14 -3
  42. letta/server/rest_api/routers/v1/identities.py +121 -0
  43. letta/server/rest_api/utils.py +183 -4
  44. letta/server/server.py +23 -117
  45. letta/services/agent_manager.py +53 -6
  46. letta/services/block_manager.py +1 -1
  47. letta/services/identity_manager.py +156 -0
  48. letta/services/job_manager.py +1 -1
  49. letta/services/message_manager.py +1 -1
  50. letta/services/organization_manager.py +1 -1
  51. letta/services/passage_manager.py +1 -1
  52. letta/services/provider_manager.py +1 -1
  53. letta/services/sandbox_config_manager.py +1 -1
  54. letta/services/source_manager.py +1 -1
  55. letta/services/step_manager.py +1 -1
  56. letta/services/tool_manager.py +1 -1
  57. letta/services/user_manager.py +1 -1
  58. letta/settings.py +3 -0
  59. letta/streaming_interface.py +6 -2
  60. letta/tracing.py +205 -0
  61. letta/utils.py +4 -0
  62. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/METADATA +9 -2
  63. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/RECORD +66 -52
  64. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/LICENSE +0 -0
  65. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/WHEEL +0 -0
  66. {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/entry_points.txt +0 -0
@@ -211,6 +211,75 @@ class OpenAIProvider(Provider):
211
211
  return None
212
212
 
213
213
 
214
+ class DeepSeekProvider(OpenAIProvider):
215
+ """
216
+ DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
217
+ but with slight differences:
218
+ * For example, DeepSeek's API requires perfect interleaving of user/assistant
219
+ * It also does not support native function calling
220
+ """
221
+
222
+ name: str = "deepseek"
223
+ base_url: str = Field("https://api.deepseek.com/v1", description="Base URL for the DeepSeek API.")
224
+ api_key: str = Field(..., description="API key for the DeepSeek API.")
225
+
226
+ def get_model_context_window_size(self, model_name: str) -> Optional[int]:
227
+ # DeepSeek doesn't return context window in the model listing,
228
+ # so these are hardcoded from their website
229
+ if model_name == "deepseek-reasoner":
230
+ return 64000
231
+ elif model_name == "deepseek-chat":
232
+ return 64000
233
+ else:
234
+ return None
235
+
236
+ def list_llm_models(self) -> List[LLMConfig]:
237
+ from letta.llm_api.openai import openai_get_model_list
238
+
239
+ response = openai_get_model_list(self.base_url, api_key=self.api_key)
240
+
241
+ if "data" in response:
242
+ data = response["data"]
243
+ else:
244
+ data = response
245
+
246
+ configs = []
247
+ for model in data:
248
+ assert "id" in model, f"DeepSeek model missing 'id' field: {model}"
249
+ model_name = model["id"]
250
+
251
+ # In case DeepSeek starts supporting it in the future:
252
+ if "context_length" in model:
253
+ # Context length is returned in OpenRouter as "context_length"
254
+ context_window_size = model["context_length"]
255
+ else:
256
+ context_window_size = self.get_model_context_window_size(model_name)
257
+
258
+ if not context_window_size:
259
+ warnings.warn(f"Couldn't find context window size for model {model_name}")
260
+ continue
261
+
262
+ # Not used for deepseek-reasoner, but otherwise is true
263
+ put_inner_thoughts_in_kwargs = False if model_name == "deepseek-reasoner" else True
264
+
265
+ configs.append(
266
+ LLMConfig(
267
+ model=model_name,
268
+ model_endpoint_type="deepseek",
269
+ model_endpoint=self.base_url,
270
+ context_window=context_window_size,
271
+ handle=self.get_handle(model_name),
272
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
273
+ )
274
+ )
275
+
276
+ return configs
277
+
278
+ def list_embedding_models(self) -> List[EmbeddingConfig]:
279
+ # No embeddings supported
280
+ return []
281
+
282
+
214
283
  class LMStudioOpenAIProvider(OpenAIProvider):
215
284
  name: str = "lmstudio-openai"
216
285
  base_url: str = Field(..., description="Base URL for the LMStudio OpenAI API.")
@@ -945,4 +1014,6 @@ class AnthropicBedrockProvider(Provider):
945
1014
  return bedrock_get_model_context_window(model_name)
946
1015
 
947
1016
  def get_handle(self, model_name: str) -> str:
948
- return f"anthropic/{model_name}"
1017
+ print(model_name)
1018
+ model = model_name.split(".")[-1]
1019
+ return f"bedrock/{model}"
@@ -48,7 +48,15 @@ class TerminalToolRule(BaseToolRule):
48
48
  type: Literal[ToolRuleType.exit_loop] = ToolRuleType.exit_loop
49
49
 
50
50
 
51
+ class ContinueToolRule(BaseToolRule):
52
+ """
53
+ Represents a tool rule configuration where if this tool gets called, it must continue the agent loop.
54
+ """
55
+
56
+ type: Literal[ToolRuleType.continue_loop] = ToolRuleType.continue_loop
57
+
58
+
51
59
  ToolRule = Annotated[
52
- Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule],
60
+ Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule, ContinueToolRule],
53
61
  Field(discriminator="type"),
54
62
  ]
@@ -0,0 +1 @@
1
+ from letta.serialize_schemas.agent import SerializedAgentSchema
@@ -0,0 +1,36 @@
1
+ from marshmallow import fields
2
+
3
+ from letta.orm import Agent
4
+ from letta.serialize_schemas.base import BaseSchema
5
+ from letta.serialize_schemas.custom_fields import EmbeddingConfigField, LLMConfigField, ToolRulesField
6
+ from letta.serialize_schemas.message import SerializedMessageSchema
7
+
8
+
9
+ class SerializedAgentSchema(BaseSchema):
10
+ """
11
+ Marshmallow schema for serializing/deserializing Agent objects.
12
+ Excludes relational fields.
13
+ """
14
+
15
+ llm_config = LLMConfigField()
16
+ embedding_config = EmbeddingConfigField()
17
+ tool_rules = ToolRulesField()
18
+
19
+ messages = fields.List(fields.Nested(SerializedMessageSchema))
20
+
21
+ def __init__(self, *args, session=None, **kwargs):
22
+ super().__init__(*args, **kwargs)
23
+ if session:
24
+ self.session = session
25
+
26
+ # propagate session to nested schemas
27
+ for field_name, field_obj in self.fields.items():
28
+ if isinstance(field_obj, fields.List) and hasattr(field_obj.inner, "schema"):
29
+ field_obj.inner.schema.session = session
30
+ elif hasattr(field_obj, "schema"):
31
+ field_obj.schema.session = session
32
+
33
+ class Meta(BaseSchema.Meta):
34
+ model = Agent
35
+ # TODO: Serialize these as well...
36
+ exclude = ("tools", "sources", "core_memory", "tags", "source_passages", "agent_passages", "organization")
@@ -0,0 +1,12 @@
1
+ from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2
+
3
+
4
+ class BaseSchema(SQLAlchemyAutoSchema):
5
+ """
6
+ Base schema for all SQLAlchemy models.
7
+ This ensures all schemas share the same session.
8
+ """
9
+
10
+ class Meta:
11
+ include_relationships = True
12
+ load_instance = True
@@ -0,0 +1,69 @@
1
+ from marshmallow import fields
2
+
3
+ from letta.helpers.converters import (
4
+ deserialize_embedding_config,
5
+ deserialize_llm_config,
6
+ deserialize_tool_calls,
7
+ deserialize_tool_rules,
8
+ serialize_embedding_config,
9
+ serialize_llm_config,
10
+ serialize_tool_calls,
11
+ serialize_tool_rules,
12
+ )
13
+
14
+
15
+ class PydanticField(fields.Field):
16
+ """Generic Marshmallow field for handling Pydantic models."""
17
+
18
+ def __init__(self, pydantic_class, **kwargs):
19
+ self.pydantic_class = pydantic_class
20
+ super().__init__(**kwargs)
21
+
22
+ def _serialize(self, value, attr, obj, **kwargs):
23
+ return value.model_dump() if value else None
24
+
25
+ def _deserialize(self, value, attr, data, **kwargs):
26
+ return self.pydantic_class(**value) if value else None
27
+
28
+
29
+ class LLMConfigField(fields.Field):
30
+ """Marshmallow field for handling LLMConfig serialization."""
31
+
32
+ def _serialize(self, value, attr, obj, **kwargs):
33
+ return serialize_llm_config(value)
34
+
35
+ def _deserialize(self, value, attr, data, **kwargs):
36
+ return deserialize_llm_config(value)
37
+
38
+
39
+ class EmbeddingConfigField(fields.Field):
40
+ """Marshmallow field for handling EmbeddingConfig serialization."""
41
+
42
+ def _serialize(self, value, attr, obj, **kwargs):
43
+ return serialize_embedding_config(value)
44
+
45
+ def _deserialize(self, value, attr, data, **kwargs):
46
+ return deserialize_embedding_config(value)
47
+
48
+
49
+ class ToolRulesField(fields.List):
50
+ """Custom Marshmallow field to handle a list of ToolRules."""
51
+
52
+ def __init__(self, **kwargs):
53
+ super().__init__(fields.Dict(), **kwargs)
54
+
55
+ def _serialize(self, value, attr, obj, **kwargs):
56
+ return serialize_tool_rules(value)
57
+
58
+ def _deserialize(self, value, attr, data, **kwargs):
59
+ return deserialize_tool_rules(value)
60
+
61
+
62
+ class ToolCallField(fields.Field):
63
+ """Marshmallow field for handling a list of OpenAI ToolCall objects."""
64
+
65
+ def _serialize(self, value, attr, obj, **kwargs):
66
+ return serialize_tool_calls(value)
67
+
68
+ def _deserialize(self, value, attr, data, **kwargs):
69
+ return deserialize_tool_calls(value)
@@ -0,0 +1,15 @@
1
+ from letta.orm.message import Message
2
+ from letta.serialize_schemas.base import BaseSchema
3
+ from letta.serialize_schemas.custom_fields import ToolCallField
4
+
5
+
6
+ class SerializedMessageSchema(BaseSchema):
7
+ """
8
+ Marshmallow schema for serializing/deserializing Message objects.
9
+ """
10
+
11
+ tool_calls = ToolCallField()
12
+
13
+ class Meta(BaseSchema.Meta):
14
+ model = Message
15
+ exclude = ("step", "job_message")
letta/server/db.py ADDED
@@ -0,0 +1,111 @@
1
+ import os
2
+ from contextlib import contextmanager
3
+
4
+ from rich.console import Console
5
+ from rich.panel import Panel
6
+ from rich.text import Text
7
+ from sqlalchemy import create_engine
8
+ from sqlalchemy.orm import sessionmaker
9
+
10
+ from letta.config import LettaConfig
11
+ from letta.log import get_logger
12
+ from letta.orm import Base
13
+
14
+ # NOTE: hack to see if single session management works
15
+ from letta.settings import settings
16
+
17
+ config = LettaConfig.load()
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ def print_sqlite_schema_error():
23
+ """Print a formatted error message for SQLite schema issues"""
24
+ console = Console()
25
+ error_text = Text()
26
+ error_text.append("Existing SQLite DB schema is invalid, and schema migrations are not supported for SQLite. ", style="bold red")
27
+ error_text.append("To have migrations supported between Letta versions, please run Letta with Docker (", style="white")
28
+ error_text.append("https://docs.letta.com/server/docker", style="blue underline")
29
+ error_text.append(") or use Postgres by setting ", style="white")
30
+ error_text.append("LETTA_PG_URI", style="yellow")
31
+ error_text.append(".\n\n", style="white")
32
+ error_text.append("If you wish to keep using SQLite, you can reset your database by removing the DB file with ", style="white")
33
+ error_text.append("rm ~/.letta/sqlite.db", style="yellow")
34
+ error_text.append(" or downgrade to your previous version of Letta.", style="white")
35
+
36
+ console.print(Panel(error_text, border_style="red"))
37
+
38
+
39
+ @contextmanager
40
+ def db_error_handler():
41
+ """Context manager for handling database errors"""
42
+ try:
43
+ yield
44
+ except Exception as e:
45
+ # Handle other SQLAlchemy errors
46
+ print(e)
47
+ print_sqlite_schema_error()
48
+ # raise ValueError(f"SQLite DB error: {str(e)}")
49
+ exit(1)
50
+
51
+
52
+ if settings.letta_pg_uri_no_default:
53
+ print("Creating postgres engine")
54
+ config.recall_storage_type = "postgres"
55
+ config.recall_storage_uri = settings.letta_pg_uri_no_default
56
+ config.archival_storage_type = "postgres"
57
+ config.archival_storage_uri = settings.letta_pg_uri_no_default
58
+
59
+ # create engine
60
+ engine = create_engine(
61
+ settings.letta_pg_uri,
62
+ pool_size=settings.pg_pool_size,
63
+ max_overflow=settings.pg_max_overflow,
64
+ pool_timeout=settings.pg_pool_timeout,
65
+ pool_recycle=settings.pg_pool_recycle,
66
+ echo=settings.pg_echo,
67
+ )
68
+ else:
69
+ # TODO: don't rely on config storage
70
+ engine_path = "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db")
71
+ logger.info("Creating sqlite engine " + engine_path)
72
+
73
+ engine = create_engine(engine_path)
74
+
75
+ # Store the original connect method
76
+ original_connect = engine.connect
77
+
78
+ def wrapped_connect(*args, **kwargs):
79
+ with db_error_handler():
80
+ # Get the connection
81
+ connection = original_connect(*args, **kwargs)
82
+
83
+ # Store the original execution method
84
+ original_execute = connection.execute
85
+
86
+ # Wrap the execute method of the connection
87
+ def wrapped_execute(*args, **kwargs):
88
+ with db_error_handler():
89
+ return original_execute(*args, **kwargs)
90
+
91
+ # Replace the connection's execute method
92
+ connection.execute = wrapped_execute
93
+
94
+ return connection
95
+
96
+ # Replace the engine's connect method
97
+ engine.connect = wrapped_connect
98
+
99
+ Base.metadata.create_all(bind=engine)
100
+
101
+
102
+ def get_db():
103
+ db = SessionLocal()
104
+ try:
105
+ yield db
106
+ finally:
107
+ db.close()
108
+
109
+
110
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
111
+ db_context = contextmanager(get_db)
@@ -231,6 +231,14 @@ def create_application() -> "FastAPI":
231
231
  allow_headers=["*"],
232
232
  )
233
233
 
234
+ # Set up OpenTelemetry tracing
235
+ endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
236
+ if endpoint:
237
+ print(f"▶ Using OTLP tracing with endpoint: {endpoint}")
238
+ from letta.tracing import setup_tracing
239
+
240
+ setup_tracing(endpoint=endpoint, service_name="memgpt-server")
241
+
234
242
  for route in v1_routes:
235
243
  app.include_router(route, prefix=API_PREFIX)
236
244
  # this gives undocumented routes for "latest" and bare api calls.
@@ -56,6 +56,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
56
56
  self.current_function_name = ""
57
57
  self.current_function_arguments = []
58
58
  self.current_json_parse_result = {}
59
+ self._found_message_tool_kwarg = False
59
60
 
60
61
  # Internal chunk buffer and event for async notification
61
62
  self._chunks = deque()
@@ -153,12 +154,13 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
153
154
  """No-op retained for interface compatibility."""
154
155
  return
155
156
 
156
- def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime) -> None:
157
+ def process_chunk(
158
+ self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime, expect_reasoning_content: bool = False
159
+ ) -> None:
157
160
  """
158
161
  Called externally with a ChatCompletionChunkResponse. Transforms
159
162
  it if necessary, then enqueues partial messages for streaming back.
160
163
  """
161
- # print("RECEIVED CHUNK...")
162
164
  processed_chunk = self._process_chunk_to_openai_style(chunk)
163
165
  if processed_chunk is not None:
164
166
  self._push_to_buffer(processed_chunk)
@@ -197,6 +199,10 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
197
199
  content (especially from a 'send_message' tool) is exposed as text
198
200
  deltas in 'content'. Otherwise, pass through or yield finish reasons.
199
201
  """
202
+ # If we've already sent the final chunk, ignore everything.
203
+ if self._found_message_tool_kwarg:
204
+ return None
205
+
200
206
  choice = chunk.choices[0]
201
207
  delta = choice.delta
202
208
 
@@ -219,25 +225,43 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
219
225
  combined_args = "".join(self.current_function_arguments)
220
226
  parsed_args = OptimisticJSONParser().parse(combined_args)
221
227
 
222
- # If the parsed result is different
223
- # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
224
- if parsed_args != self.current_json_parse_result:
225
- self.current_json_parse_result = parsed_args
226
- # If we can see a "message" field, return it as partial content
227
- if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
228
- return ChatCompletionChunk(
229
- id=chunk.id,
230
- object=chunk.object,
231
- created=chunk.created.timestamp(),
232
- model=chunk.model,
233
- choices=[
234
- Choice(
235
- index=choice.index,
236
- delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
237
- finish_reason=None,
238
- )
239
- ],
240
- )
228
+ # TODO: Make this less brittle! This depends on `message` coming first!
229
+ # This is a heuristic we use to know if we're done with the `message` part of `send_message`
230
+ if len(parsed_args.keys()) > 1:
231
+ self._found_message_tool_kwarg = True
232
+ return ChatCompletionChunk(
233
+ id=chunk.id,
234
+ object=chunk.object,
235
+ created=chunk.created.timestamp(),
236
+ model=chunk.model,
237
+ choices=[
238
+ Choice(
239
+ index=choice.index,
240
+ delta=ChoiceDelta(),
241
+ finish_reason="stop",
242
+ )
243
+ ],
244
+ )
245
+ else:
246
+ # If the parsed result is different
247
+ # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
248
+ if parsed_args != self.current_json_parse_result:
249
+ self.current_json_parse_result = parsed_args
250
+ # If we can see a "message" field, return it as partial content
251
+ if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
252
+ return ChatCompletionChunk(
253
+ id=chunk.id,
254
+ object=chunk.object,
255
+ created=chunk.created.timestamp(),
256
+ model=chunk.model,
257
+ choices=[
258
+ Choice(
259
+ index=choice.index,
260
+ delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
261
+ finish_reason=None,
262
+ )
263
+ ],
264
+ )
241
265
 
242
266
  # If there's a finish reason, pass that along
243
267
  if choice.finish_reason is not None:
@@ -317,6 +317,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
317
317
  self.debug = False
318
318
  self.timeout = 10 * 60 # 10 minute timeout
319
319
 
320
+ # for expect_reasoning_content, we should accumulate `content`
321
+ self.expect_reasoning_content_buffer = None
322
+
320
323
  def _reset_inner_thoughts_json_reader(self):
321
324
  # A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
322
325
  self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=self.inner_thoughts_kwarg, wait_for_first_key=True)
@@ -387,6 +390,39 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
387
390
  # Wipe the inner thoughts buffers
388
391
  self._reset_inner_thoughts_json_reader()
389
392
 
393
+ # If we were in reasoning mode and accumulated a json block, attempt to release it as chunks
394
+ # if self.expect_reasoning_content_buffer is not None:
395
+ # try:
396
+ # # NOTE: this is hardcoded for our DeepSeek API integration
397
+ # json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
398
+
399
+ # if "name" in json_reasoning_content:
400
+ # self._push_to_buffer(
401
+ # ToolCallMessage(
402
+ # id=message_id,
403
+ # date=message_date,
404
+ # tool_call=ToolCallDelta(
405
+ # name=json_reasoning_content["name"],
406
+ # arguments=None,
407
+ # tool_call_id=None,
408
+ # ),
409
+ # )
410
+ # )
411
+ # if "arguments" in json_reasoning_content:
412
+ # self._push_to_buffer(
413
+ # ToolCallMessage(
414
+ # id=message_id,
415
+ # date=message_date,
416
+ # tool_call=ToolCallDelta(
417
+ # name=None,
418
+ # arguments=json_reasoning_content["arguments"],
419
+ # tool_call_id=None,
420
+ # ),
421
+ # )
422
+ # )
423
+ # except Exception as e:
424
+ # print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
425
+
390
426
  def step_complete(self):
391
427
  """Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
392
428
  if not self.multi_step:
@@ -410,7 +446,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
410
446
  return
411
447
 
412
448
  def _process_chunk_to_letta_style(
413
- self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime
449
+ self,
450
+ chunk: ChatCompletionChunkResponse,
451
+ message_id: str,
452
+ message_date: datetime,
453
+ # if we expect `reasoning_content``, then that's what gets mapped to ReasoningMessage
454
+ # and `content` needs to be handled outside the interface
455
+ expect_reasoning_content: bool = False,
414
456
  ) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
415
457
  """
416
458
  Example data from non-streaming response looks like:
@@ -426,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
426
468
 
427
469
  if (
428
470
  message_delta.content is None
471
+ and (expect_reasoning_content and message_delta.reasoning_content is None)
429
472
  and message_delta.tool_calls is None
430
473
  and message_delta.function_call is None
431
474
  and choice.finish_reason is None
@@ -435,17 +478,68 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
435
478
  return None
436
479
 
437
480
  # inner thoughts
438
- if message_delta.content is not None:
439
- if message_delta.content == "":
440
- print("skipping empty content")
441
- processed_chunk = None
481
+ if expect_reasoning_content and message_delta.reasoning_content is not None:
482
+ processed_chunk = ReasoningMessage(
483
+ id=message_id,
484
+ date=message_date,
485
+ reasoning=message_delta.reasoning_content,
486
+ )
487
+ elif expect_reasoning_content and message_delta.content is not None:
488
+ # "ignore" content if we expect reasoning content
489
+ if self.expect_reasoning_content_buffer is None:
490
+ self.expect_reasoning_content_buffer = message_delta.content
442
491
  else:
443
- processed_chunk = ReasoningMessage(
492
+ self.expect_reasoning_content_buffer += message_delta.content
493
+
494
+ # we expect this to be pure JSON
495
+ # OptimisticJSONParser
496
+
497
+ # If we can pull a name out, pull it
498
+
499
+ try:
500
+ # NOTE: this is hardcoded for our DeepSeek API integration
501
+ json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
502
+ print(f"json_reasoning_content: {json_reasoning_content}")
503
+
504
+ processed_chunk = ToolCallMessage(
444
505
  id=message_id,
445
506
  date=message_date,
446
- reasoning=message_delta.content,
507
+ tool_call=ToolCallDelta(
508
+ name=json_reasoning_content.get("name"),
509
+ arguments=json.dumps(json_reasoning_content.get("arguments")),
510
+ tool_call_id=None,
511
+ ),
447
512
  )
448
513
 
514
+ except json.JSONDecodeError as e:
515
+ print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
516
+
517
+ return None
518
+ # Else,
519
+ # return None
520
+ # processed_chunk = ToolCallMessage(
521
+ # id=message_id,
522
+ # date=message_date,
523
+ # tool_call=ToolCallDelta(
524
+ # # name=tool_call_delta.get("name"),
525
+ # name=None,
526
+ # arguments=message_delta.content,
527
+ # # tool_call_id=tool_call_delta.get("id"),
528
+ # tool_call_id=None,
529
+ # ),
530
+ # )
531
+ # return processed_chunk
532
+
533
+ # TODO eventually output as tool call outputs?
534
+ # print(f"Hiding content delta stream: '{message_delta.content}'")
535
+ # return None
536
+ elif message_delta.content is not None:
537
+ processed_chunk = ReasoningMessage(
538
+ id=message_id,
539
+ date=message_date,
540
+ reasoning=message_delta.content,
541
+ )
542
+
449
543
  # tool calls
450
544
  elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
451
545
  tool_call = message_delta.tool_calls[0]
@@ -890,7 +984,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
890
984
 
891
985
  return processed_chunk
892
986
 
893
- def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime):
987
+ def process_chunk(
988
+ self,
989
+ chunk: ChatCompletionChunkResponse,
990
+ message_id: str,
991
+ message_date: datetime,
992
+ expect_reasoning_content: bool = False,
993
+ ):
894
994
  """Process a streaming chunk from an OpenAI-compatible server.
895
995
 
896
996
  Example data from non-streaming response looks like:
@@ -910,7 +1010,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
910
1010
  # processed_chunk = self._process_chunk_to_openai_style(chunk)
911
1011
  raise NotImplementedError("OpenAI proxy streaming temporarily disabled")
912
1012
  else:
913
- processed_chunk = self._process_chunk_to_letta_style(chunk=chunk, message_id=message_id, message_date=message_date)
1013
+ processed_chunk = self._process_chunk_to_letta_style(
1014
+ chunk=chunk,
1015
+ message_id=message_id,
1016
+ message_date=message_date,
1017
+ expect_reasoning_content=expect_reasoning_content,
1018
+ )
914
1019
  if processed_chunk is None:
915
1020
  return
916
1021