letta-nightly 0.12.1.dev20251024104217__py3-none-any.whl → 0.13.0.dev20251025104015__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +2 -3
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/simple_llm_request_adapter.py +8 -5
- letta/adapters/simple_llm_stream_adapter.py +22 -6
- letta/agents/agent_loop.py +10 -3
- letta/agents/base_agent.py +4 -1
- letta/agents/helpers.py +41 -9
- letta/agents/letta_agent.py +11 -10
- letta/agents/letta_agent_v2.py +47 -37
- letta/agents/letta_agent_v3.py +395 -300
- letta/agents/voice_agent.py +8 -6
- letta/agents/voice_sleeptime_agent.py +3 -3
- letta/constants.py +30 -7
- letta/errors.py +20 -0
- letta/functions/function_sets/base.py +55 -3
- letta/functions/mcp_client/types.py +33 -57
- letta/functions/schema_generator.py +135 -23
- letta/groups/sleeptime_multi_agent_v3.py +6 -11
- letta/groups/sleeptime_multi_agent_v4.py +227 -0
- letta/helpers/converters.py +78 -4
- letta/helpers/crypto_utils.py +6 -2
- letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +9 -11
- letta/interfaces/anthropic_streaming_interface.py +3 -4
- letta/interfaces/gemini_streaming_interface.py +4 -6
- letta/interfaces/openai_streaming_interface.py +63 -28
- letta/llm_api/anthropic_client.py +7 -4
- letta/llm_api/deepseek_client.py +6 -4
- letta/llm_api/google_ai_client.py +3 -12
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +90 -61
- letta/llm_api/llm_api_tools.py +4 -1
- letta/llm_api/openai.py +12 -12
- letta/llm_api/openai_client.py +53 -16
- letta/local_llm/constants.py +4 -3
- letta/local_llm/json_parser.py +5 -2
- letta/local_llm/utils.py +2 -3
- letta/log.py +171 -7
- letta/orm/agent.py +43 -9
- letta/orm/archive.py +4 -0
- letta/orm/custom_columns.py +15 -0
- letta/orm/identity.py +11 -11
- letta/orm/mcp_server.py +9 -0
- letta/orm/message.py +6 -1
- letta/orm/run_metrics.py +7 -2
- letta/orm/sqlalchemy_base.py +2 -2
- letta/orm/tool.py +3 -0
- letta/otel/tracing.py +2 -0
- letta/prompts/prompt_generator.py +7 -2
- letta/schemas/agent.py +41 -10
- letta/schemas/agent_file.py +3 -0
- letta/schemas/archive.py +4 -2
- letta/schemas/block.py +2 -1
- letta/schemas/enums.py +36 -3
- letta/schemas/file.py +3 -3
- letta/schemas/folder.py +2 -1
- letta/schemas/group.py +2 -1
- letta/schemas/identity.py +18 -9
- letta/schemas/job.py +3 -1
- letta/schemas/letta_message.py +71 -12
- letta/schemas/letta_request.py +7 -3
- letta/schemas/letta_stop_reason.py +0 -25
- letta/schemas/llm_config.py +8 -2
- letta/schemas/mcp.py +80 -83
- letta/schemas/mcp_server.py +349 -0
- letta/schemas/memory.py +20 -8
- letta/schemas/message.py +212 -67
- letta/schemas/providers/anthropic.py +13 -6
- letta/schemas/providers/azure.py +6 -4
- letta/schemas/providers/base.py +8 -4
- letta/schemas/providers/bedrock.py +6 -2
- letta/schemas/providers/cerebras.py +7 -3
- letta/schemas/providers/deepseek.py +2 -1
- letta/schemas/providers/google_gemini.py +15 -6
- letta/schemas/providers/groq.py +2 -1
- letta/schemas/providers/lmstudio.py +9 -6
- letta/schemas/providers/mistral.py +2 -1
- letta/schemas/providers/openai.py +7 -2
- letta/schemas/providers/together.py +9 -3
- letta/schemas/providers/xai.py +7 -3
- letta/schemas/run.py +7 -2
- letta/schemas/run_metrics.py +2 -1
- letta/schemas/sandbox_config.py +2 -2
- letta/schemas/secret.py +3 -158
- letta/schemas/source.py +2 -2
- letta/schemas/step.py +2 -2
- letta/schemas/tool.py +24 -1
- letta/schemas/usage.py +0 -1
- letta/server/rest_api/app.py +123 -7
- letta/server/rest_api/dependencies.py +3 -0
- letta/server/rest_api/interface.py +7 -4
- letta/server/rest_api/redis_stream_manager.py +16 -1
- letta/server/rest_api/routers/v1/__init__.py +7 -0
- letta/server/rest_api/routers/v1/agents.py +332 -322
- letta/server/rest_api/routers/v1/archives.py +127 -40
- letta/server/rest_api/routers/v1/blocks.py +54 -6
- letta/server/rest_api/routers/v1/chat_completions.py +146 -0
- letta/server/rest_api/routers/v1/folders.py +27 -35
- letta/server/rest_api/routers/v1/groups.py +23 -35
- letta/server/rest_api/routers/v1/identities.py +24 -10
- letta/server/rest_api/routers/v1/internal_runs.py +107 -0
- letta/server/rest_api/routers/v1/internal_templates.py +162 -179
- letta/server/rest_api/routers/v1/jobs.py +15 -27
- letta/server/rest_api/routers/v1/mcp_servers.py +309 -0
- letta/server/rest_api/routers/v1/messages.py +23 -34
- letta/server/rest_api/routers/v1/organizations.py +6 -27
- letta/server/rest_api/routers/v1/providers.py +35 -62
- letta/server/rest_api/routers/v1/runs.py +30 -43
- letta/server/rest_api/routers/v1/sandbox_configs.py +6 -4
- letta/server/rest_api/routers/v1/sources.py +26 -42
- letta/server/rest_api/routers/v1/steps.py +16 -29
- letta/server/rest_api/routers/v1/tools.py +17 -13
- letta/server/rest_api/routers/v1/users.py +5 -17
- letta/server/rest_api/routers/v1/voice.py +18 -27
- letta/server/rest_api/streaming_response.py +5 -2
- letta/server/rest_api/utils.py +187 -25
- letta/server/server.py +27 -22
- letta/server/ws_api/server.py +5 -4
- letta/services/agent_manager.py +148 -26
- letta/services/agent_serialization_manager.py +6 -1
- letta/services/archive_manager.py +168 -15
- letta/services/block_manager.py +14 -4
- letta/services/file_manager.py +33 -29
- letta/services/group_manager.py +10 -0
- letta/services/helpers/agent_manager_helper.py +65 -11
- letta/services/identity_manager.py +105 -4
- letta/services/job_manager.py +11 -1
- letta/services/mcp/base_client.py +2 -2
- letta/services/mcp/oauth_utils.py +33 -8
- letta/services/mcp_manager.py +174 -78
- letta/services/mcp_server_manager.py +1331 -0
- letta/services/message_manager.py +109 -4
- letta/services/organization_manager.py +4 -4
- letta/services/passage_manager.py +9 -25
- letta/services/provider_manager.py +91 -15
- letta/services/run_manager.py +72 -15
- letta/services/sandbox_config_manager.py +45 -3
- letta/services/source_manager.py +15 -8
- letta/services/step_manager.py +24 -1
- letta/services/streaming_service.py +581 -0
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/tool_executor/core_tool_executor.py +111 -0
- letta/services/tool_executor/files_tool_executor.py +5 -3
- letta/services/tool_executor/sandbox_tool_executor.py +2 -2
- letta/services/tool_executor/tool_execution_manager.py +1 -1
- letta/services/tool_manager.py +10 -3
- letta/services/tool_sandbox/base.py +61 -1
- letta/services/tool_sandbox/local_sandbox.py +1 -3
- letta/services/user_manager.py +2 -2
- letta/settings.py +49 -5
- letta/system.py +14 -5
- letta/utils.py +73 -1
- letta/validators.py +105 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/METADATA +4 -2
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/RECORD +157 -151
- letta/schemas/letta_ping.py +0 -28
- letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/WHEEL +0 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
from typing import AsyncIterator, Optional, Union
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from fastapi.responses import StreamingResponse
|
|
7
|
+
from openai.types.chat import ChatCompletionChunk
|
|
8
|
+
from openai.types.chat.chat_completion_chunk import Choice, ChoiceDelta
|
|
9
|
+
|
|
10
|
+
from letta.agents.agent_loop import AgentLoop
|
|
11
|
+
from letta.agents.base_agent_v2 import BaseAgentV2
|
|
12
|
+
from letta.constants import REDIS_RUN_ID_PREFIX
|
|
13
|
+
from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
|
|
14
|
+
from letta.errors import (
|
|
15
|
+
LettaInvalidArgumentError,
|
|
16
|
+
LettaServiceUnavailableError,
|
|
17
|
+
LLMAuthenticationError,
|
|
18
|
+
LLMError,
|
|
19
|
+
LLMRateLimitError,
|
|
20
|
+
LLMTimeoutError,
|
|
21
|
+
PendingApprovalError,
|
|
22
|
+
)
|
|
23
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
|
24
|
+
from letta.log import get_logger
|
|
25
|
+
from letta.otel.context import get_ctx_attributes
|
|
26
|
+
from letta.otel.metric_registry import MetricRegistry
|
|
27
|
+
from letta.schemas.agent import AgentState
|
|
28
|
+
from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus
|
|
29
|
+
from letta.schemas.job import LettaRequestConfig
|
|
30
|
+
from letta.schemas.letta_message import AssistantMessage, MessageType
|
|
31
|
+
from letta.schemas.letta_message_content import TextContent
|
|
32
|
+
from letta.schemas.letta_request import LettaStreamingRequest
|
|
33
|
+
from letta.schemas.letta_response import LettaResponse
|
|
34
|
+
from letta.schemas.message import MessageCreate
|
|
35
|
+
from letta.schemas.run import Run as PydanticRun, RunUpdate
|
|
36
|
+
from letta.schemas.usage import LettaUsageStatistics
|
|
37
|
+
from letta.schemas.user import User
|
|
38
|
+
from letta.server.rest_api.redis_stream_manager import create_background_stream_processor, redis_sse_stream_generator
|
|
39
|
+
from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
|
|
40
|
+
from letta.services.run_manager import RunManager
|
|
41
|
+
from letta.settings import settings
|
|
42
|
+
from letta.utils import safe_create_task
|
|
43
|
+
|
|
44
|
+
logger = get_logger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class StreamingService:
|
|
48
|
+
"""
|
|
49
|
+
Service for managing agent streaming responses.
|
|
50
|
+
Handles run creation, stream generation, error handling, and format conversion.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, server):
|
|
54
|
+
"""
|
|
55
|
+
Initialize the streaming service.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
server: The SyncServer instance for accessing managers and services
|
|
59
|
+
"""
|
|
60
|
+
self.server = server
|
|
61
|
+
self.runs_manager = RunManager() if settings.track_agent_run else None
|
|
62
|
+
|
|
63
|
+
async def create_agent_stream(
|
|
64
|
+
self,
|
|
65
|
+
agent_id: str,
|
|
66
|
+
actor: User,
|
|
67
|
+
request: LettaStreamingRequest,
|
|
68
|
+
run_type: str = "streaming",
|
|
69
|
+
) -> tuple[Optional[PydanticRun], Union[StreamingResponse, LettaResponse]]:
|
|
70
|
+
"""
|
|
71
|
+
Create a streaming response for an agent.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
agent_id: The agent ID to stream from
|
|
75
|
+
actor: The user making the request
|
|
76
|
+
request: The LettaStreamingRequest containing all request parameters
|
|
77
|
+
run_type: Type of run for tracking
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Tuple of (run object or None, streaming response)
|
|
81
|
+
"""
|
|
82
|
+
request_start_timestamp_ns = get_utc_timestamp_ns()
|
|
83
|
+
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
|
84
|
+
|
|
85
|
+
# get redis client
|
|
86
|
+
redis_client = await get_redis_client()
|
|
87
|
+
|
|
88
|
+
# load agent and check eligibility
|
|
89
|
+
agent = await self.server.agent_manager.get_agent_by_id_async(
|
|
90
|
+
agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
agent_eligible = self._is_agent_eligible(agent)
|
|
94
|
+
model_compatible = self._is_model_compatible(agent)
|
|
95
|
+
model_compatible_token_streaming = self._is_token_streaming_compatible(agent)
|
|
96
|
+
|
|
97
|
+
# create run if tracking is enabled
|
|
98
|
+
run = None
|
|
99
|
+
run_update_metadata = None
|
|
100
|
+
if settings.track_agent_run:
|
|
101
|
+
run = await self._create_run(agent_id, request, run_type, actor)
|
|
102
|
+
await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
if agent_eligible and model_compatible:
|
|
106
|
+
# use agent loop for streaming
|
|
107
|
+
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
|
108
|
+
|
|
109
|
+
# create the base stream with error handling
|
|
110
|
+
raw_stream = self._create_error_aware_stream(
|
|
111
|
+
agent_loop=agent_loop,
|
|
112
|
+
messages=request.messages,
|
|
113
|
+
max_steps=request.max_steps,
|
|
114
|
+
stream_tokens=request.stream_tokens and model_compatible_token_streaming,
|
|
115
|
+
run_id=run.id if run else None,
|
|
116
|
+
use_assistant_message=request.use_assistant_message,
|
|
117
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
|
118
|
+
include_return_message_types=request.include_return_message_types,
|
|
119
|
+
actor=actor,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# handle background streaming if requested
|
|
123
|
+
if request.background and settings.track_agent_run:
|
|
124
|
+
if isinstance(redis_client, NoopAsyncRedisClient):
|
|
125
|
+
raise LettaServiceUnavailableError(
|
|
126
|
+
f"Background streaming requires Redis to be running. "
|
|
127
|
+
f"Please ensure Redis is properly configured. "
|
|
128
|
+
f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}",
|
|
129
|
+
service_name="redis",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
safe_create_task(
|
|
133
|
+
create_background_stream_processor(
|
|
134
|
+
stream_generator=raw_stream,
|
|
135
|
+
redis_client=redis_client,
|
|
136
|
+
run_id=run.id,
|
|
137
|
+
run_manager=self.server.run_manager,
|
|
138
|
+
actor=actor,
|
|
139
|
+
),
|
|
140
|
+
label=f"background_stream_processor_{run.id}",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
raw_stream = redis_sse_stream_generator(
|
|
144
|
+
redis_client=redis_client,
|
|
145
|
+
run_id=run.id,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# conditionally wrap with keepalive based on request parameter
|
|
149
|
+
if request.include_pings and settings.enable_keepalive:
|
|
150
|
+
stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval, run_id=run.id)
|
|
151
|
+
else:
|
|
152
|
+
stream = raw_stream
|
|
153
|
+
|
|
154
|
+
result = StreamingResponseWithStatusCode(
|
|
155
|
+
stream,
|
|
156
|
+
media_type="text/event-stream",
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
# fallback to non-agent-loop streaming
|
|
160
|
+
result = await self.server.send_message_to_agent(
|
|
161
|
+
agent_id=agent_id,
|
|
162
|
+
actor=actor,
|
|
163
|
+
input_messages=request.messages,
|
|
164
|
+
stream_steps=True,
|
|
165
|
+
stream_tokens=request.stream_tokens,
|
|
166
|
+
use_assistant_message=request.use_assistant_message,
|
|
167
|
+
assistant_message_tool_name=request.assistant_message_tool_name,
|
|
168
|
+
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
|
169
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
|
170
|
+
include_return_message_types=request.include_return_message_types,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# update run status to running before returning
|
|
174
|
+
if settings.track_agent_run and run:
|
|
175
|
+
run_status = RunStatus.running
|
|
176
|
+
|
|
177
|
+
return run, result
|
|
178
|
+
|
|
179
|
+
except PendingApprovalError as e:
|
|
180
|
+
if settings.track_agent_run:
|
|
181
|
+
run_update_metadata = {"error": str(e)}
|
|
182
|
+
run_status = RunStatus.failed
|
|
183
|
+
raise
|
|
184
|
+
except Exception as e:
|
|
185
|
+
if settings.track_agent_run:
|
|
186
|
+
run_update_metadata = {"error": str(e)}
|
|
187
|
+
run_status = RunStatus.failed
|
|
188
|
+
raise
|
|
189
|
+
finally:
|
|
190
|
+
if settings.track_agent_run and run:
|
|
191
|
+
await self.server.run_manager.update_run_by_id_async(
|
|
192
|
+
run_id=run.id,
|
|
193
|
+
update=RunUpdate(status=run_status, metadata=run_update_metadata),
|
|
194
|
+
actor=actor,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
async def create_agent_stream_openai_chat_completions(
|
|
198
|
+
self,
|
|
199
|
+
agent_id: str,
|
|
200
|
+
actor: User,
|
|
201
|
+
request: LettaStreamingRequest,
|
|
202
|
+
) -> StreamingResponse:
|
|
203
|
+
"""
|
|
204
|
+
Create OpenAI-compatible chat completions streaming response.
|
|
205
|
+
|
|
206
|
+
Transforms Letta's internal streaming format to match OpenAI's
|
|
207
|
+
ChatCompletionChunk schema, filtering out internal tool execution
|
|
208
|
+
and only streaming assistant text responses.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
agent_id: The agent ID to stream from
|
|
212
|
+
actor: The user making the request
|
|
213
|
+
request: The LettaStreamingRequest containing all request parameters
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
StreamingResponse with OpenAI-formatted SSE chunks
|
|
217
|
+
"""
|
|
218
|
+
# load agent to get model info for the completion chunks
|
|
219
|
+
agent = await self.server.agent_manager.get_agent_by_id_async(agent_id, actor)
|
|
220
|
+
|
|
221
|
+
# create standard Letta stream (returns SSE-formatted stream)
|
|
222
|
+
run, letta_stream_response = await self.create_agent_stream(
|
|
223
|
+
agent_id=agent_id,
|
|
224
|
+
actor=actor,
|
|
225
|
+
request=request,
|
|
226
|
+
run_type="openai_chat_completions",
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# extract the stream iterator from the response
|
|
230
|
+
if isinstance(letta_stream_response, StreamingResponseWithStatusCode):
|
|
231
|
+
letta_stream = letta_stream_response.body_iterator
|
|
232
|
+
else:
|
|
233
|
+
raise LettaInvalidArgumentError(
|
|
234
|
+
"Agent is not compatible with streaming mode",
|
|
235
|
+
argument_name="model",
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# create transformer with agent's model info
|
|
239
|
+
model_name = agent.llm_config.model if agent.llm_config else "unknown"
|
|
240
|
+
completion_id = f"chatcmpl-{run.id if run else str(uuid4())}"
|
|
241
|
+
|
|
242
|
+
transformer = OpenAIChatCompletionsStreamTransformer(
|
|
243
|
+
model=model_name,
|
|
244
|
+
completion_id=completion_id,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# transform Letta SSE stream to OpenAI format (parser handles SSE strings)
|
|
248
|
+
openai_stream = transformer.transform_stream(letta_stream)
|
|
249
|
+
|
|
250
|
+
return StreamingResponse(
|
|
251
|
+
openai_stream,
|
|
252
|
+
media_type="text/event-stream",
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def _create_error_aware_stream(
|
|
256
|
+
self,
|
|
257
|
+
agent_loop: BaseAgentV2,
|
|
258
|
+
messages: list[MessageCreate],
|
|
259
|
+
max_steps: int,
|
|
260
|
+
stream_tokens: bool,
|
|
261
|
+
run_id: Optional[str],
|
|
262
|
+
use_assistant_message: bool,
|
|
263
|
+
request_start_timestamp_ns: int,
|
|
264
|
+
include_return_message_types: Optional[list[MessageType]],
|
|
265
|
+
actor: User,
|
|
266
|
+
) -> AsyncIterator:
|
|
267
|
+
"""
|
|
268
|
+
Create a stream with unified error handling.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Async iterator that yields chunks with proper error handling
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
async def error_aware_stream():
|
|
275
|
+
"""Stream that handles early LLM errors gracefully in streaming format."""
|
|
276
|
+
try:
|
|
277
|
+
stream = agent_loop.stream(
|
|
278
|
+
input_messages=messages,
|
|
279
|
+
max_steps=max_steps,
|
|
280
|
+
stream_tokens=stream_tokens,
|
|
281
|
+
run_id=run_id,
|
|
282
|
+
use_assistant_message=use_assistant_message,
|
|
283
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
|
284
|
+
include_return_message_types=include_return_message_types,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
async for chunk in stream:
|
|
288
|
+
yield chunk
|
|
289
|
+
|
|
290
|
+
# update run status after completion
|
|
291
|
+
if run_id and self.runs_manager:
|
|
292
|
+
if agent_loop.stop_reason.stop_reason.value == "cancelled":
|
|
293
|
+
run_status = RunStatus.cancelled
|
|
294
|
+
else:
|
|
295
|
+
run_status = RunStatus.completed
|
|
296
|
+
|
|
297
|
+
await self.runs_manager.update_run_by_id_async(
|
|
298
|
+
run_id=run_id,
|
|
299
|
+
update=RunUpdate(status=run_status, stop_reason=agent_loop.stop_reason.stop_reason.value),
|
|
300
|
+
actor=actor,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
except LLMTimeoutError as e:
|
|
304
|
+
error_data = {"error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}}
|
|
305
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 504)
|
|
306
|
+
except LLMRateLimitError as e:
|
|
307
|
+
error_data = {
|
|
308
|
+
"error": {
|
|
309
|
+
"type": "llm_rate_limit",
|
|
310
|
+
"message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
|
|
311
|
+
"detail": str(e),
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 429)
|
|
315
|
+
except LLMAuthenticationError as e:
|
|
316
|
+
error_data = {
|
|
317
|
+
"error": {
|
|
318
|
+
"type": "llm_authentication",
|
|
319
|
+
"message": "Authentication failed with the LLM model provider.",
|
|
320
|
+
"detail": str(e),
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 401)
|
|
324
|
+
except LLMError as e:
|
|
325
|
+
error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
|
|
326
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 502)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
|
|
329
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 500)
|
|
330
|
+
|
|
331
|
+
return error_aware_stream()
|
|
332
|
+
|
|
333
|
+
def _is_agent_eligible(self, agent: AgentState) -> bool:
|
|
334
|
+
"""Check if agent is eligible for streaming."""
|
|
335
|
+
return agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
|
|
336
|
+
|
|
337
|
+
def _is_model_compatible(self, agent: AgentState) -> bool:
|
|
338
|
+
"""Check if agent's model is compatible with streaming."""
|
|
339
|
+
return agent.llm_config.model_endpoint_type in [
|
|
340
|
+
"anthropic",
|
|
341
|
+
"openai",
|
|
342
|
+
"together",
|
|
343
|
+
"google_ai",
|
|
344
|
+
"google_vertex",
|
|
345
|
+
"bedrock",
|
|
346
|
+
"ollama",
|
|
347
|
+
"azure",
|
|
348
|
+
"xai",
|
|
349
|
+
"groq",
|
|
350
|
+
"deepseek",
|
|
351
|
+
]
|
|
352
|
+
|
|
353
|
+
def _is_token_streaming_compatible(self, agent: AgentState) -> bool:
|
|
354
|
+
"""Check if agent's model supports token-level streaming."""
|
|
355
|
+
base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"]
|
|
356
|
+
google_letta_v1 = agent.agent_type == AgentType.letta_v1_agent and agent.llm_config.model_endpoint_type in [
|
|
357
|
+
"google_ai",
|
|
358
|
+
"google_vertex",
|
|
359
|
+
]
|
|
360
|
+
return base_compatible or google_letta_v1
|
|
361
|
+
|
|
362
|
+
async def _create_run(self, agent_id: str, request: LettaStreamingRequest, run_type: str, actor: User) -> PydanticRun:
|
|
363
|
+
"""Create a run for tracking execution."""
|
|
364
|
+
run = await self.runs_manager.create_run(
|
|
365
|
+
pydantic_run=PydanticRun(
|
|
366
|
+
agent_id=agent_id,
|
|
367
|
+
background=request.background or False,
|
|
368
|
+
metadata={
|
|
369
|
+
"run_type": run_type,
|
|
370
|
+
},
|
|
371
|
+
request_config=LettaRequestConfig.from_letta_request(request),
|
|
372
|
+
),
|
|
373
|
+
actor=actor,
|
|
374
|
+
)
|
|
375
|
+
return run
|
|
376
|
+
|
|
377
|
+
async def _update_run_status(
|
|
378
|
+
self,
|
|
379
|
+
run_id: str,
|
|
380
|
+
status: RunStatus,
|
|
381
|
+
actor: User,
|
|
382
|
+
error: Optional[str] = None,
|
|
383
|
+
stop_reason: Optional[str] = None,
|
|
384
|
+
):
|
|
385
|
+
"""Update the status of a run."""
|
|
386
|
+
if not self.runs_manager:
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
update = RunUpdate(status=status)
|
|
390
|
+
if error:
|
|
391
|
+
update.metadata = {"error": error}
|
|
392
|
+
if stop_reason:
|
|
393
|
+
update.stop_reason = stop_reason
|
|
394
|
+
|
|
395
|
+
await self.runs_manager.update_run_by_id_async(
|
|
396
|
+
run_id=run_id,
|
|
397
|
+
update=update,
|
|
398
|
+
actor=actor,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class OpenAIChatCompletionsStreamTransformer:
|
|
403
|
+
"""
|
|
404
|
+
Transforms Letta streaming messages into OpenAI ChatCompletionChunk format.
|
|
405
|
+
Filters out internal tool execution and only streams assistant text responses.
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
def __init__(self, model: str, completion_id: str):
|
|
409
|
+
"""
|
|
410
|
+
Initialize the transformer.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
model: Model name to include in chunks
|
|
414
|
+
completion_id: Unique ID for this completion (format: chatcmpl-{uuid})
|
|
415
|
+
"""
|
|
416
|
+
self.model = model
|
|
417
|
+
self.completion_id = completion_id
|
|
418
|
+
self.first_chunk = True
|
|
419
|
+
self.created = int(time.time())
|
|
420
|
+
|
|
421
|
+
# TODO: This is lowkey really ugly and poor code design, but this works fine for now
|
|
422
|
+
def _parse_sse_chunk(self, sse_string: str):
|
|
423
|
+
"""
|
|
424
|
+
Parse SSE-formatted string back into a message object.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
sse_string: SSE formatted string like "data: {...}\n\n"
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
Parsed message object or None if can't parse
|
|
431
|
+
"""
|
|
432
|
+
try:
|
|
433
|
+
# strip SSE formatting
|
|
434
|
+
if sse_string.startswith("data: "):
|
|
435
|
+
json_str = sse_string[6:].strip()
|
|
436
|
+
|
|
437
|
+
# handle [DONE] marker
|
|
438
|
+
if json_str == "[DONE]":
|
|
439
|
+
return MessageStreamStatus.done
|
|
440
|
+
|
|
441
|
+
# parse JSON
|
|
442
|
+
data = json.loads(json_str)
|
|
443
|
+
|
|
444
|
+
# reconstruct message object based on message_type
|
|
445
|
+
message_type = data.get("message_type")
|
|
446
|
+
|
|
447
|
+
if message_type == "assistant_message":
|
|
448
|
+
return AssistantMessage(**data)
|
|
449
|
+
elif message_type == "usage_statistics":
|
|
450
|
+
return LettaUsageStatistics(**data)
|
|
451
|
+
elif message_type == "stop_reason":
|
|
452
|
+
# skip stop_reason, we use [DONE] instead
|
|
453
|
+
return None
|
|
454
|
+
else:
|
|
455
|
+
# other message types we skip
|
|
456
|
+
return None
|
|
457
|
+
return None
|
|
458
|
+
except Exception as e:
|
|
459
|
+
logger.warning(f"Failed to parse SSE chunk: {e}")
|
|
460
|
+
return None
|
|
461
|
+
|
|
462
|
+
async def transform_stream(self, letta_stream: AsyncIterator) -> AsyncIterator[str]:
|
|
463
|
+
"""
|
|
464
|
+
Transform Letta stream to OpenAI ChatCompletionChunk SSE format.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
letta_stream: Async iterator of Letta messages (may be SSE strings or objects)
|
|
468
|
+
|
|
469
|
+
Yields:
|
|
470
|
+
SSE-formatted strings: "data: {json}\n\n"
|
|
471
|
+
"""
|
|
472
|
+
try:
|
|
473
|
+
async for raw_chunk in letta_stream:
|
|
474
|
+
# parse SSE string if needed
|
|
475
|
+
if isinstance(raw_chunk, str):
|
|
476
|
+
chunk = self._parse_sse_chunk(raw_chunk)
|
|
477
|
+
if chunk is None:
|
|
478
|
+
continue # skip unparseable or filtered chunks
|
|
479
|
+
else:
|
|
480
|
+
chunk = raw_chunk
|
|
481
|
+
|
|
482
|
+
# only process assistant messages
|
|
483
|
+
if isinstance(chunk, AssistantMessage):
|
|
484
|
+
async for sse_chunk in self._process_assistant_message(chunk):
|
|
485
|
+
print(f"CHUNK: {sse_chunk}")
|
|
486
|
+
yield sse_chunk
|
|
487
|
+
|
|
488
|
+
# handle completion status
|
|
489
|
+
elif chunk == MessageStreamStatus.done:
|
|
490
|
+
# emit final chunk with finish_reason
|
|
491
|
+
final_chunk = ChatCompletionChunk(
|
|
492
|
+
id=self.completion_id,
|
|
493
|
+
object="chat.completion.chunk",
|
|
494
|
+
created=self.created,
|
|
495
|
+
model=self.model,
|
|
496
|
+
choices=[
|
|
497
|
+
Choice(
|
|
498
|
+
index=0,
|
|
499
|
+
delta=ChoiceDelta(),
|
|
500
|
+
finish_reason="stop",
|
|
501
|
+
)
|
|
502
|
+
],
|
|
503
|
+
)
|
|
504
|
+
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
505
|
+
yield "data: [DONE]\n\n"
|
|
506
|
+
|
|
507
|
+
except Exception as e:
|
|
508
|
+
logger.error(f"Error in OpenAI stream transformation: {e}", exc_info=True)
|
|
509
|
+
error_chunk = {"error": {"message": str(e), "type": "server_error"}}
|
|
510
|
+
yield f"data: {json.dumps(error_chunk)}\n\n"
|
|
511
|
+
|
|
512
|
+
async def _process_assistant_message(self, message: AssistantMessage) -> AsyncIterator[str]:
|
|
513
|
+
"""
|
|
514
|
+
Convert AssistantMessage to OpenAI ChatCompletionChunk(s).
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
message: Letta AssistantMessage with content
|
|
518
|
+
|
|
519
|
+
Yields:
|
|
520
|
+
SSE-formatted chunk strings
|
|
521
|
+
"""
|
|
522
|
+
# extract text from content (can be string or list of TextContent)
|
|
523
|
+
text_content = self._extract_text_content(message.content)
|
|
524
|
+
if not text_content:
|
|
525
|
+
return
|
|
526
|
+
|
|
527
|
+
# emit role on first chunk only
|
|
528
|
+
if self.first_chunk:
|
|
529
|
+
self.first_chunk = False
|
|
530
|
+
# first chunk includes role
|
|
531
|
+
chunk = ChatCompletionChunk(
|
|
532
|
+
id=self.completion_id,
|
|
533
|
+
object="chat.completion.chunk",
|
|
534
|
+
created=self.created,
|
|
535
|
+
model=self.model,
|
|
536
|
+
choices=[
|
|
537
|
+
Choice(
|
|
538
|
+
index=0,
|
|
539
|
+
delta=ChoiceDelta(role="assistant", content=text_content),
|
|
540
|
+
finish_reason=None,
|
|
541
|
+
)
|
|
542
|
+
],
|
|
543
|
+
)
|
|
544
|
+
else:
|
|
545
|
+
# subsequent chunks just have content
|
|
546
|
+
chunk = ChatCompletionChunk(
|
|
547
|
+
id=self.completion_id,
|
|
548
|
+
object="chat.completion.chunk",
|
|
549
|
+
created=self.created,
|
|
550
|
+
model=self.model,
|
|
551
|
+
choices=[
|
|
552
|
+
Choice(
|
|
553
|
+
index=0,
|
|
554
|
+
delta=ChoiceDelta(content=text_content),
|
|
555
|
+
finish_reason=None,
|
|
556
|
+
)
|
|
557
|
+
],
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
561
|
+
|
|
562
|
+
def _extract_text_content(self, content: Union[str, list[TextContent]]) -> str:
|
|
563
|
+
"""
|
|
564
|
+
Extract text string from content field.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
content: Either a string or list of TextContent objects
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
Extracted text string
|
|
571
|
+
"""
|
|
572
|
+
if isinstance(content, str):
|
|
573
|
+
return content
|
|
574
|
+
elif isinstance(content, list):
|
|
575
|
+
# concatenate all TextContent items
|
|
576
|
+
text_parts = []
|
|
577
|
+
for item in content:
|
|
578
|
+
if isinstance(item, TextContent):
|
|
579
|
+
text_parts.append(item.text)
|
|
580
|
+
return "".join(text_parts)
|
|
581
|
+
return ""
|
|
@@ -106,7 +106,7 @@ class Summarizer:
|
|
|
106
106
|
try:
|
|
107
107
|
t.result() # This re-raises exceptions from the task
|
|
108
108
|
except Exception:
|
|
109
|
-
logger.
|
|
109
|
+
logger.exception("Background task failed")
|
|
110
110
|
|
|
111
111
|
task.add_done_callback(callback)
|
|
112
112
|
return task
|