letta-nightly 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +19 -2
- letta/client/client.py +2 -0
- letta/constants.py +2 -0
- letta/functions/schema_generator.py +6 -6
- letta/helpers/converters.py +153 -0
- letta/helpers/tool_rule_solver.py +11 -1
- letta/llm_api/anthropic.py +10 -5
- letta/llm_api/aws_bedrock.py +1 -1
- letta/llm_api/deepseek.py +303 -0
- letta/llm_api/helpers.py +20 -10
- letta/llm_api/llm_api_tools.py +85 -2
- letta/llm_api/openai.py +16 -1
- letta/local_llm/chat_completion_proxy.py +15 -2
- letta/local_llm/lmstudio/api.py +75 -1
- letta/orm/__init__.py +2 -0
- letta/orm/agent.py +11 -4
- letta/orm/custom_columns.py +31 -110
- letta/orm/identities_agents.py +13 -0
- letta/orm/identity.py +60 -0
- letta/orm/organization.py +2 -0
- letta/orm/sqlalchemy_base.py +4 -0
- letta/schemas/agent.py +11 -1
- letta/schemas/identity.py +67 -0
- letta/schemas/llm_config.py +2 -0
- letta/schemas/message.py +1 -1
- letta/schemas/openai/chat_completion_response.py +2 -0
- letta/schemas/providers.py +72 -1
- letta/schemas/tool_rule.py +9 -1
- letta/serialize_schemas/__init__.py +1 -0
- letta/serialize_schemas/agent.py +36 -0
- letta/serialize_schemas/base.py +12 -0
- letta/serialize_schemas/custom_fields.py +69 -0
- letta/serialize_schemas/message.py +15 -0
- letta/server/db.py +111 -0
- letta/server/rest_api/app.py +8 -0
- letta/server/rest_api/chat_completions_interface.py +45 -21
- letta/server/rest_api/interface.py +114 -9
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +98 -24
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +14 -3
- letta/server/rest_api/routers/v1/identities.py +121 -0
- letta/server/rest_api/utils.py +183 -4
- letta/server/server.py +23 -117
- letta/services/agent_manager.py +53 -6
- letta/services/block_manager.py +1 -1
- letta/services/identity_manager.py +156 -0
- letta/services/job_manager.py +1 -1
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +1 -1
- letta/services/step_manager.py +1 -1
- letta/services/tool_manager.py +1 -1
- letta/services/user_manager.py +1 -1
- letta/settings.py +3 -0
- letta/streaming_interface.py +6 -2
- letta/tracing.py +205 -0
- letta/utils.py +4 -0
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/METADATA +9 -2
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/RECORD +66 -52
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/entry_points.txt +0 -0
letta/schemas/providers.py
CHANGED
|
@@ -211,6 +211,75 @@ class OpenAIProvider(Provider):
|
|
|
211
211
|
return None
|
|
212
212
|
|
|
213
213
|
|
|
214
|
+
class DeepSeekProvider(OpenAIProvider):
|
|
215
|
+
"""
|
|
216
|
+
DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
|
|
217
|
+
but with slight differences:
|
|
218
|
+
* For example, DeepSeek's API requires perfect interleaving of user/assistant
|
|
219
|
+
* It also does not support native function calling
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
name: str = "deepseek"
|
|
223
|
+
base_url: str = Field("https://api.deepseek.com/v1", description="Base URL for the DeepSeek API.")
|
|
224
|
+
api_key: str = Field(..., description="API key for the DeepSeek API.")
|
|
225
|
+
|
|
226
|
+
def get_model_context_window_size(self, model_name: str) -> Optional[int]:
|
|
227
|
+
# DeepSeek doesn't return context window in the model listing,
|
|
228
|
+
# so these are hardcoded from their website
|
|
229
|
+
if model_name == "deepseek-reasoner":
|
|
230
|
+
return 64000
|
|
231
|
+
elif model_name == "deepseek-chat":
|
|
232
|
+
return 64000
|
|
233
|
+
else:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
237
|
+
from letta.llm_api.openai import openai_get_model_list
|
|
238
|
+
|
|
239
|
+
response = openai_get_model_list(self.base_url, api_key=self.api_key)
|
|
240
|
+
|
|
241
|
+
if "data" in response:
|
|
242
|
+
data = response["data"]
|
|
243
|
+
else:
|
|
244
|
+
data = response
|
|
245
|
+
|
|
246
|
+
configs = []
|
|
247
|
+
for model in data:
|
|
248
|
+
assert "id" in model, f"DeepSeek model missing 'id' field: {model}"
|
|
249
|
+
model_name = model["id"]
|
|
250
|
+
|
|
251
|
+
# In case DeepSeek starts supporting it in the future:
|
|
252
|
+
if "context_length" in model:
|
|
253
|
+
# Context length is returned in OpenRouter as "context_length"
|
|
254
|
+
context_window_size = model["context_length"]
|
|
255
|
+
else:
|
|
256
|
+
context_window_size = self.get_model_context_window_size(model_name)
|
|
257
|
+
|
|
258
|
+
if not context_window_size:
|
|
259
|
+
warnings.warn(f"Couldn't find context window size for model {model_name}")
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Not used for deepseek-reasoner, but otherwise is true
|
|
263
|
+
put_inner_thoughts_in_kwargs = False if model_name == "deepseek-reasoner" else True
|
|
264
|
+
|
|
265
|
+
configs.append(
|
|
266
|
+
LLMConfig(
|
|
267
|
+
model=model_name,
|
|
268
|
+
model_endpoint_type="deepseek",
|
|
269
|
+
model_endpoint=self.base_url,
|
|
270
|
+
context_window=context_window_size,
|
|
271
|
+
handle=self.get_handle(model_name),
|
|
272
|
+
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return configs
|
|
277
|
+
|
|
278
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
279
|
+
# No embeddings supported
|
|
280
|
+
return []
|
|
281
|
+
|
|
282
|
+
|
|
214
283
|
class LMStudioOpenAIProvider(OpenAIProvider):
|
|
215
284
|
name: str = "lmstudio-openai"
|
|
216
285
|
base_url: str = Field(..., description="Base URL for the LMStudio OpenAI API.")
|
|
@@ -945,4 +1014,6 @@ class AnthropicBedrockProvider(Provider):
|
|
|
945
1014
|
return bedrock_get_model_context_window(model_name)
|
|
946
1015
|
|
|
947
1016
|
def get_handle(self, model_name: str) -> str:
|
|
948
|
-
|
|
1017
|
+
print(model_name)
|
|
1018
|
+
model = model_name.split(".")[-1]
|
|
1019
|
+
return f"bedrock/{model}"
|
letta/schemas/tool_rule.py
CHANGED
|
@@ -48,7 +48,15 @@ class TerminalToolRule(BaseToolRule):
|
|
|
48
48
|
type: Literal[ToolRuleType.exit_loop] = ToolRuleType.exit_loop
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
class ContinueToolRule(BaseToolRule):
|
|
52
|
+
"""
|
|
53
|
+
Represents a tool rule configuration where if this tool gets called, it must continue the agent loop.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
type: Literal[ToolRuleType.continue_loop] = ToolRuleType.continue_loop
|
|
57
|
+
|
|
58
|
+
|
|
51
59
|
ToolRule = Annotated[
|
|
52
|
-
Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule],
|
|
60
|
+
Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule, ContinueToolRule],
|
|
53
61
|
Field(discriminator="type"),
|
|
54
62
|
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from letta.serialize_schemas.agent import SerializedAgentSchema
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from marshmallow import fields
|
|
2
|
+
|
|
3
|
+
from letta.orm import Agent
|
|
4
|
+
from letta.serialize_schemas.base import BaseSchema
|
|
5
|
+
from letta.serialize_schemas.custom_fields import EmbeddingConfigField, LLMConfigField, ToolRulesField
|
|
6
|
+
from letta.serialize_schemas.message import SerializedMessageSchema
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SerializedAgentSchema(BaseSchema):
|
|
10
|
+
"""
|
|
11
|
+
Marshmallow schema for serializing/deserializing Agent objects.
|
|
12
|
+
Excludes relational fields.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
llm_config = LLMConfigField()
|
|
16
|
+
embedding_config = EmbeddingConfigField()
|
|
17
|
+
tool_rules = ToolRulesField()
|
|
18
|
+
|
|
19
|
+
messages = fields.List(fields.Nested(SerializedMessageSchema))
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args, session=None, **kwargs):
|
|
22
|
+
super().__init__(*args, **kwargs)
|
|
23
|
+
if session:
|
|
24
|
+
self.session = session
|
|
25
|
+
|
|
26
|
+
# propagate session to nested schemas
|
|
27
|
+
for field_name, field_obj in self.fields.items():
|
|
28
|
+
if isinstance(field_obj, fields.List) and hasattr(field_obj.inner, "schema"):
|
|
29
|
+
field_obj.inner.schema.session = session
|
|
30
|
+
elif hasattr(field_obj, "schema"):
|
|
31
|
+
field_obj.schema.session = session
|
|
32
|
+
|
|
33
|
+
class Meta(BaseSchema.Meta):
|
|
34
|
+
model = Agent
|
|
35
|
+
# TODO: Serialize these as well...
|
|
36
|
+
exclude = ("tools", "sources", "core_memory", "tags", "source_passages", "agent_passages", "organization")
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BaseSchema(SQLAlchemyAutoSchema):
|
|
5
|
+
"""
|
|
6
|
+
Base schema for all SQLAlchemy models.
|
|
7
|
+
This ensures all schemas share the same session.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
class Meta:
|
|
11
|
+
include_relationships = True
|
|
12
|
+
load_instance = True
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from marshmallow import fields
|
|
2
|
+
|
|
3
|
+
from letta.helpers.converters import (
|
|
4
|
+
deserialize_embedding_config,
|
|
5
|
+
deserialize_llm_config,
|
|
6
|
+
deserialize_tool_calls,
|
|
7
|
+
deserialize_tool_rules,
|
|
8
|
+
serialize_embedding_config,
|
|
9
|
+
serialize_llm_config,
|
|
10
|
+
serialize_tool_calls,
|
|
11
|
+
serialize_tool_rules,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PydanticField(fields.Field):
|
|
16
|
+
"""Generic Marshmallow field for handling Pydantic models."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, pydantic_class, **kwargs):
|
|
19
|
+
self.pydantic_class = pydantic_class
|
|
20
|
+
super().__init__(**kwargs)
|
|
21
|
+
|
|
22
|
+
def _serialize(self, value, attr, obj, **kwargs):
|
|
23
|
+
return value.model_dump() if value else None
|
|
24
|
+
|
|
25
|
+
def _deserialize(self, value, attr, data, **kwargs):
|
|
26
|
+
return self.pydantic_class(**value) if value else None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class LLMConfigField(fields.Field):
|
|
30
|
+
"""Marshmallow field for handling LLMConfig serialization."""
|
|
31
|
+
|
|
32
|
+
def _serialize(self, value, attr, obj, **kwargs):
|
|
33
|
+
return serialize_llm_config(value)
|
|
34
|
+
|
|
35
|
+
def _deserialize(self, value, attr, data, **kwargs):
|
|
36
|
+
return deserialize_llm_config(value)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class EmbeddingConfigField(fields.Field):
|
|
40
|
+
"""Marshmallow field for handling EmbeddingConfig serialization."""
|
|
41
|
+
|
|
42
|
+
def _serialize(self, value, attr, obj, **kwargs):
|
|
43
|
+
return serialize_embedding_config(value)
|
|
44
|
+
|
|
45
|
+
def _deserialize(self, value, attr, data, **kwargs):
|
|
46
|
+
return deserialize_embedding_config(value)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ToolRulesField(fields.List):
|
|
50
|
+
"""Custom Marshmallow field to handle a list of ToolRules."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, **kwargs):
|
|
53
|
+
super().__init__(fields.Dict(), **kwargs)
|
|
54
|
+
|
|
55
|
+
def _serialize(self, value, attr, obj, **kwargs):
|
|
56
|
+
return serialize_tool_rules(value)
|
|
57
|
+
|
|
58
|
+
def _deserialize(self, value, attr, data, **kwargs):
|
|
59
|
+
return deserialize_tool_rules(value)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ToolCallField(fields.Field):
|
|
63
|
+
"""Marshmallow field for handling a list of OpenAI ToolCall objects."""
|
|
64
|
+
|
|
65
|
+
def _serialize(self, value, attr, obj, **kwargs):
|
|
66
|
+
return serialize_tool_calls(value)
|
|
67
|
+
|
|
68
|
+
def _deserialize(self, value, attr, data, **kwargs):
|
|
69
|
+
return deserialize_tool_calls(value)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from letta.orm.message import Message
|
|
2
|
+
from letta.serialize_schemas.base import BaseSchema
|
|
3
|
+
from letta.serialize_schemas.custom_fields import ToolCallField
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SerializedMessageSchema(BaseSchema):
|
|
7
|
+
"""
|
|
8
|
+
Marshmallow schema for serializing/deserializing Message objects.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
tool_calls = ToolCallField()
|
|
12
|
+
|
|
13
|
+
class Meta(BaseSchema.Meta):
|
|
14
|
+
model = Message
|
|
15
|
+
exclude = ("step", "job_message")
|
letta/server/db.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.text import Text
|
|
7
|
+
from sqlalchemy import create_engine
|
|
8
|
+
from sqlalchemy.orm import sessionmaker
|
|
9
|
+
|
|
10
|
+
from letta.config import LettaConfig
|
|
11
|
+
from letta.log import get_logger
|
|
12
|
+
from letta.orm import Base
|
|
13
|
+
|
|
14
|
+
# NOTE: hack to see if single session management works
|
|
15
|
+
from letta.settings import settings
|
|
16
|
+
|
|
17
|
+
config = LettaConfig.load()
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def print_sqlite_schema_error():
|
|
23
|
+
"""Print a formatted error message for SQLite schema issues"""
|
|
24
|
+
console = Console()
|
|
25
|
+
error_text = Text()
|
|
26
|
+
error_text.append("Existing SQLite DB schema is invalid, and schema migrations are not supported for SQLite. ", style="bold red")
|
|
27
|
+
error_text.append("To have migrations supported between Letta versions, please run Letta with Docker (", style="white")
|
|
28
|
+
error_text.append("https://docs.letta.com/server/docker", style="blue underline")
|
|
29
|
+
error_text.append(") or use Postgres by setting ", style="white")
|
|
30
|
+
error_text.append("LETTA_PG_URI", style="yellow")
|
|
31
|
+
error_text.append(".\n\n", style="white")
|
|
32
|
+
error_text.append("If you wish to keep using SQLite, you can reset your database by removing the DB file with ", style="white")
|
|
33
|
+
error_text.append("rm ~/.letta/sqlite.db", style="yellow")
|
|
34
|
+
error_text.append(" or downgrade to your previous version of Letta.", style="white")
|
|
35
|
+
|
|
36
|
+
console.print(Panel(error_text, border_style="red"))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@contextmanager
|
|
40
|
+
def db_error_handler():
|
|
41
|
+
"""Context manager for handling database errors"""
|
|
42
|
+
try:
|
|
43
|
+
yield
|
|
44
|
+
except Exception as e:
|
|
45
|
+
# Handle other SQLAlchemy errors
|
|
46
|
+
print(e)
|
|
47
|
+
print_sqlite_schema_error()
|
|
48
|
+
# raise ValueError(f"SQLite DB error: {str(e)}")
|
|
49
|
+
exit(1)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if settings.letta_pg_uri_no_default:
|
|
53
|
+
print("Creating postgres engine")
|
|
54
|
+
config.recall_storage_type = "postgres"
|
|
55
|
+
config.recall_storage_uri = settings.letta_pg_uri_no_default
|
|
56
|
+
config.archival_storage_type = "postgres"
|
|
57
|
+
config.archival_storage_uri = settings.letta_pg_uri_no_default
|
|
58
|
+
|
|
59
|
+
# create engine
|
|
60
|
+
engine = create_engine(
|
|
61
|
+
settings.letta_pg_uri,
|
|
62
|
+
pool_size=settings.pg_pool_size,
|
|
63
|
+
max_overflow=settings.pg_max_overflow,
|
|
64
|
+
pool_timeout=settings.pg_pool_timeout,
|
|
65
|
+
pool_recycle=settings.pg_pool_recycle,
|
|
66
|
+
echo=settings.pg_echo,
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
# TODO: don't rely on config storage
|
|
70
|
+
engine_path = "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db")
|
|
71
|
+
logger.info("Creating sqlite engine " + engine_path)
|
|
72
|
+
|
|
73
|
+
engine = create_engine(engine_path)
|
|
74
|
+
|
|
75
|
+
# Store the original connect method
|
|
76
|
+
original_connect = engine.connect
|
|
77
|
+
|
|
78
|
+
def wrapped_connect(*args, **kwargs):
|
|
79
|
+
with db_error_handler():
|
|
80
|
+
# Get the connection
|
|
81
|
+
connection = original_connect(*args, **kwargs)
|
|
82
|
+
|
|
83
|
+
# Store the original execution method
|
|
84
|
+
original_execute = connection.execute
|
|
85
|
+
|
|
86
|
+
# Wrap the execute method of the connection
|
|
87
|
+
def wrapped_execute(*args, **kwargs):
|
|
88
|
+
with db_error_handler():
|
|
89
|
+
return original_execute(*args, **kwargs)
|
|
90
|
+
|
|
91
|
+
# Replace the connection's execute method
|
|
92
|
+
connection.execute = wrapped_execute
|
|
93
|
+
|
|
94
|
+
return connection
|
|
95
|
+
|
|
96
|
+
# Replace the engine's connect method
|
|
97
|
+
engine.connect = wrapped_connect
|
|
98
|
+
|
|
99
|
+
Base.metadata.create_all(bind=engine)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_db():
|
|
103
|
+
db = SessionLocal()
|
|
104
|
+
try:
|
|
105
|
+
yield db
|
|
106
|
+
finally:
|
|
107
|
+
db.close()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
111
|
+
db_context = contextmanager(get_db)
|
letta/server/rest_api/app.py
CHANGED
|
@@ -231,6 +231,14 @@ def create_application() -> "FastAPI":
|
|
|
231
231
|
allow_headers=["*"],
|
|
232
232
|
)
|
|
233
233
|
|
|
234
|
+
# Set up OpenTelemetry tracing
|
|
235
|
+
endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
236
|
+
if endpoint:
|
|
237
|
+
print(f"▶ Using OTLP tracing with endpoint: {endpoint}")
|
|
238
|
+
from letta.tracing import setup_tracing
|
|
239
|
+
|
|
240
|
+
setup_tracing(endpoint=endpoint, service_name="memgpt-server")
|
|
241
|
+
|
|
234
242
|
for route in v1_routes:
|
|
235
243
|
app.include_router(route, prefix=API_PREFIX)
|
|
236
244
|
# this gives undocumented routes for "latest" and bare api calls.
|
|
@@ -56,6 +56,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
56
56
|
self.current_function_name = ""
|
|
57
57
|
self.current_function_arguments = []
|
|
58
58
|
self.current_json_parse_result = {}
|
|
59
|
+
self._found_message_tool_kwarg = False
|
|
59
60
|
|
|
60
61
|
# Internal chunk buffer and event for async notification
|
|
61
62
|
self._chunks = deque()
|
|
@@ -153,12 +154,13 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
153
154
|
"""No-op retained for interface compatibility."""
|
|
154
155
|
return
|
|
155
156
|
|
|
156
|
-
def process_chunk(
|
|
157
|
+
def process_chunk(
|
|
158
|
+
self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime, expect_reasoning_content: bool = False
|
|
159
|
+
) -> None:
|
|
157
160
|
"""
|
|
158
161
|
Called externally with a ChatCompletionChunkResponse. Transforms
|
|
159
162
|
it if necessary, then enqueues partial messages for streaming back.
|
|
160
163
|
"""
|
|
161
|
-
# print("RECEIVED CHUNK...")
|
|
162
164
|
processed_chunk = self._process_chunk_to_openai_style(chunk)
|
|
163
165
|
if processed_chunk is not None:
|
|
164
166
|
self._push_to_buffer(processed_chunk)
|
|
@@ -197,6 +199,10 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
197
199
|
content (especially from a 'send_message' tool) is exposed as text
|
|
198
200
|
deltas in 'content'. Otherwise, pass through or yield finish reasons.
|
|
199
201
|
"""
|
|
202
|
+
# If we've already sent the final chunk, ignore everything.
|
|
203
|
+
if self._found_message_tool_kwarg:
|
|
204
|
+
return None
|
|
205
|
+
|
|
200
206
|
choice = chunk.choices[0]
|
|
201
207
|
delta = choice.delta
|
|
202
208
|
|
|
@@ -219,25 +225,43 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
219
225
|
combined_args = "".join(self.current_function_arguments)
|
|
220
226
|
parsed_args = OptimisticJSONParser().parse(combined_args)
|
|
221
227
|
|
|
222
|
-
#
|
|
223
|
-
# This is
|
|
224
|
-
if parsed_args
|
|
225
|
-
self.
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
228
|
+
# TODO: Make this less brittle! This depends on `message` coming first!
|
|
229
|
+
# This is a heuristic we use to know if we're done with the `message` part of `send_message`
|
|
230
|
+
if len(parsed_args.keys()) > 1:
|
|
231
|
+
self._found_message_tool_kwarg = True
|
|
232
|
+
return ChatCompletionChunk(
|
|
233
|
+
id=chunk.id,
|
|
234
|
+
object=chunk.object,
|
|
235
|
+
created=chunk.created.timestamp(),
|
|
236
|
+
model=chunk.model,
|
|
237
|
+
choices=[
|
|
238
|
+
Choice(
|
|
239
|
+
index=choice.index,
|
|
240
|
+
delta=ChoiceDelta(),
|
|
241
|
+
finish_reason="stop",
|
|
242
|
+
)
|
|
243
|
+
],
|
|
244
|
+
)
|
|
245
|
+
else:
|
|
246
|
+
# If the parsed result is different
|
|
247
|
+
# This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
|
|
248
|
+
if parsed_args != self.current_json_parse_result:
|
|
249
|
+
self.current_json_parse_result = parsed_args
|
|
250
|
+
# If we can see a "message" field, return it as partial content
|
|
251
|
+
if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
|
|
252
|
+
return ChatCompletionChunk(
|
|
253
|
+
id=chunk.id,
|
|
254
|
+
object=chunk.object,
|
|
255
|
+
created=chunk.created.timestamp(),
|
|
256
|
+
model=chunk.model,
|
|
257
|
+
choices=[
|
|
258
|
+
Choice(
|
|
259
|
+
index=choice.index,
|
|
260
|
+
delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
|
|
261
|
+
finish_reason=None,
|
|
262
|
+
)
|
|
263
|
+
],
|
|
264
|
+
)
|
|
241
265
|
|
|
242
266
|
# If there's a finish reason, pass that along
|
|
243
267
|
if choice.finish_reason is not None:
|
|
@@ -317,6 +317,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
317
317
|
self.debug = False
|
|
318
318
|
self.timeout = 10 * 60 # 10 minute timeout
|
|
319
319
|
|
|
320
|
+
# for expect_reasoning_content, we should accumulate `content`
|
|
321
|
+
self.expect_reasoning_content_buffer = None
|
|
322
|
+
|
|
320
323
|
def _reset_inner_thoughts_json_reader(self):
|
|
321
324
|
# A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
|
|
322
325
|
self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=self.inner_thoughts_kwarg, wait_for_first_key=True)
|
|
@@ -387,6 +390,39 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
387
390
|
# Wipe the inner thoughts buffers
|
|
388
391
|
self._reset_inner_thoughts_json_reader()
|
|
389
392
|
|
|
393
|
+
# If we were in reasoning mode and accumulated a json block, attempt to release it as chunks
|
|
394
|
+
# if self.expect_reasoning_content_buffer is not None:
|
|
395
|
+
# try:
|
|
396
|
+
# # NOTE: this is hardcoded for our DeepSeek API integration
|
|
397
|
+
# json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
|
|
398
|
+
|
|
399
|
+
# if "name" in json_reasoning_content:
|
|
400
|
+
# self._push_to_buffer(
|
|
401
|
+
# ToolCallMessage(
|
|
402
|
+
# id=message_id,
|
|
403
|
+
# date=message_date,
|
|
404
|
+
# tool_call=ToolCallDelta(
|
|
405
|
+
# name=json_reasoning_content["name"],
|
|
406
|
+
# arguments=None,
|
|
407
|
+
# tool_call_id=None,
|
|
408
|
+
# ),
|
|
409
|
+
# )
|
|
410
|
+
# )
|
|
411
|
+
# if "arguments" in json_reasoning_content:
|
|
412
|
+
# self._push_to_buffer(
|
|
413
|
+
# ToolCallMessage(
|
|
414
|
+
# id=message_id,
|
|
415
|
+
# date=message_date,
|
|
416
|
+
# tool_call=ToolCallDelta(
|
|
417
|
+
# name=None,
|
|
418
|
+
# arguments=json_reasoning_content["arguments"],
|
|
419
|
+
# tool_call_id=None,
|
|
420
|
+
# ),
|
|
421
|
+
# )
|
|
422
|
+
# )
|
|
423
|
+
# except Exception as e:
|
|
424
|
+
# print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
|
|
425
|
+
|
|
390
426
|
def step_complete(self):
|
|
391
427
|
"""Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
|
|
392
428
|
if not self.multi_step:
|
|
@@ -410,7 +446,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
410
446
|
return
|
|
411
447
|
|
|
412
448
|
def _process_chunk_to_letta_style(
|
|
413
|
-
self,
|
|
449
|
+
self,
|
|
450
|
+
chunk: ChatCompletionChunkResponse,
|
|
451
|
+
message_id: str,
|
|
452
|
+
message_date: datetime,
|
|
453
|
+
# if we expect `reasoning_content``, then that's what gets mapped to ReasoningMessage
|
|
454
|
+
# and `content` needs to be handled outside the interface
|
|
455
|
+
expect_reasoning_content: bool = False,
|
|
414
456
|
) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
|
|
415
457
|
"""
|
|
416
458
|
Example data from non-streaming response looks like:
|
|
@@ -426,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
426
468
|
|
|
427
469
|
if (
|
|
428
470
|
message_delta.content is None
|
|
471
|
+
and (expect_reasoning_content and message_delta.reasoning_content is None)
|
|
429
472
|
and message_delta.tool_calls is None
|
|
430
473
|
and message_delta.function_call is None
|
|
431
474
|
and choice.finish_reason is None
|
|
@@ -435,17 +478,68 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
435
478
|
return None
|
|
436
479
|
|
|
437
480
|
# inner thoughts
|
|
438
|
-
if message_delta.
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
481
|
+
if expect_reasoning_content and message_delta.reasoning_content is not None:
|
|
482
|
+
processed_chunk = ReasoningMessage(
|
|
483
|
+
id=message_id,
|
|
484
|
+
date=message_date,
|
|
485
|
+
reasoning=message_delta.reasoning_content,
|
|
486
|
+
)
|
|
487
|
+
elif expect_reasoning_content and message_delta.content is not None:
|
|
488
|
+
# "ignore" content if we expect reasoning content
|
|
489
|
+
if self.expect_reasoning_content_buffer is None:
|
|
490
|
+
self.expect_reasoning_content_buffer = message_delta.content
|
|
442
491
|
else:
|
|
443
|
-
|
|
492
|
+
self.expect_reasoning_content_buffer += message_delta.content
|
|
493
|
+
|
|
494
|
+
# we expect this to be pure JSON
|
|
495
|
+
# OptimisticJSONParser
|
|
496
|
+
|
|
497
|
+
# If we can pull a name out, pull it
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
# NOTE: this is hardcoded for our DeepSeek API integration
|
|
501
|
+
json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
|
|
502
|
+
print(f"json_reasoning_content: {json_reasoning_content}")
|
|
503
|
+
|
|
504
|
+
processed_chunk = ToolCallMessage(
|
|
444
505
|
id=message_id,
|
|
445
506
|
date=message_date,
|
|
446
|
-
|
|
507
|
+
tool_call=ToolCallDelta(
|
|
508
|
+
name=json_reasoning_content.get("name"),
|
|
509
|
+
arguments=json.dumps(json_reasoning_content.get("arguments")),
|
|
510
|
+
tool_call_id=None,
|
|
511
|
+
),
|
|
447
512
|
)
|
|
448
513
|
|
|
514
|
+
except json.JSONDecodeError as e:
|
|
515
|
+
print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
|
|
516
|
+
|
|
517
|
+
return None
|
|
518
|
+
# Else,
|
|
519
|
+
# return None
|
|
520
|
+
# processed_chunk = ToolCallMessage(
|
|
521
|
+
# id=message_id,
|
|
522
|
+
# date=message_date,
|
|
523
|
+
# tool_call=ToolCallDelta(
|
|
524
|
+
# # name=tool_call_delta.get("name"),
|
|
525
|
+
# name=None,
|
|
526
|
+
# arguments=message_delta.content,
|
|
527
|
+
# # tool_call_id=tool_call_delta.get("id"),
|
|
528
|
+
# tool_call_id=None,
|
|
529
|
+
# ),
|
|
530
|
+
# )
|
|
531
|
+
# return processed_chunk
|
|
532
|
+
|
|
533
|
+
# TODO eventually output as tool call outputs?
|
|
534
|
+
# print(f"Hiding content delta stream: '{message_delta.content}'")
|
|
535
|
+
# return None
|
|
536
|
+
elif message_delta.content is not None:
|
|
537
|
+
processed_chunk = ReasoningMessage(
|
|
538
|
+
id=message_id,
|
|
539
|
+
date=message_date,
|
|
540
|
+
reasoning=message_delta.content,
|
|
541
|
+
)
|
|
542
|
+
|
|
449
543
|
# tool calls
|
|
450
544
|
elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
|
451
545
|
tool_call = message_delta.tool_calls[0]
|
|
@@ -890,7 +984,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
890
984
|
|
|
891
985
|
return processed_chunk
|
|
892
986
|
|
|
893
|
-
def process_chunk(
|
|
987
|
+
def process_chunk(
|
|
988
|
+
self,
|
|
989
|
+
chunk: ChatCompletionChunkResponse,
|
|
990
|
+
message_id: str,
|
|
991
|
+
message_date: datetime,
|
|
992
|
+
expect_reasoning_content: bool = False,
|
|
993
|
+
):
|
|
894
994
|
"""Process a streaming chunk from an OpenAI-compatible server.
|
|
895
995
|
|
|
896
996
|
Example data from non-streaming response looks like:
|
|
@@ -910,7 +1010,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
910
1010
|
# processed_chunk = self._process_chunk_to_openai_style(chunk)
|
|
911
1011
|
raise NotImplementedError("OpenAI proxy streaming temporarily disabled")
|
|
912
1012
|
else:
|
|
913
|
-
processed_chunk = self._process_chunk_to_letta_style(
|
|
1013
|
+
processed_chunk = self._process_chunk_to_letta_style(
|
|
1014
|
+
chunk=chunk,
|
|
1015
|
+
message_id=message_id,
|
|
1016
|
+
message_date=message_date,
|
|
1017
|
+
expect_reasoning_content=expect_reasoning_content,
|
|
1018
|
+
)
|
|
914
1019
|
if processed_chunk is None:
|
|
915
1020
|
return
|
|
916
1021
|
|