letta-nightly 0.12.1.dev20251024104217__py3-none-any.whl → 0.13.0.dev20251025104015__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (159) hide show
  1. letta/__init__.py +2 -3
  2. letta/adapters/letta_llm_adapter.py +1 -0
  3. letta/adapters/simple_llm_request_adapter.py +8 -5
  4. letta/adapters/simple_llm_stream_adapter.py +22 -6
  5. letta/agents/agent_loop.py +10 -3
  6. letta/agents/base_agent.py +4 -1
  7. letta/agents/helpers.py +41 -9
  8. letta/agents/letta_agent.py +11 -10
  9. letta/agents/letta_agent_v2.py +47 -37
  10. letta/agents/letta_agent_v3.py +395 -300
  11. letta/agents/voice_agent.py +8 -6
  12. letta/agents/voice_sleeptime_agent.py +3 -3
  13. letta/constants.py +30 -7
  14. letta/errors.py +20 -0
  15. letta/functions/function_sets/base.py +55 -3
  16. letta/functions/mcp_client/types.py +33 -57
  17. letta/functions/schema_generator.py +135 -23
  18. letta/groups/sleeptime_multi_agent_v3.py +6 -11
  19. letta/groups/sleeptime_multi_agent_v4.py +227 -0
  20. letta/helpers/converters.py +78 -4
  21. letta/helpers/crypto_utils.py +6 -2
  22. letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +9 -11
  23. letta/interfaces/anthropic_streaming_interface.py +3 -4
  24. letta/interfaces/gemini_streaming_interface.py +4 -6
  25. letta/interfaces/openai_streaming_interface.py +63 -28
  26. letta/llm_api/anthropic_client.py +7 -4
  27. letta/llm_api/deepseek_client.py +6 -4
  28. letta/llm_api/google_ai_client.py +3 -12
  29. letta/llm_api/google_vertex_client.py +1 -1
  30. letta/llm_api/helpers.py +90 -61
  31. letta/llm_api/llm_api_tools.py +4 -1
  32. letta/llm_api/openai.py +12 -12
  33. letta/llm_api/openai_client.py +53 -16
  34. letta/local_llm/constants.py +4 -3
  35. letta/local_llm/json_parser.py +5 -2
  36. letta/local_llm/utils.py +2 -3
  37. letta/log.py +171 -7
  38. letta/orm/agent.py +43 -9
  39. letta/orm/archive.py +4 -0
  40. letta/orm/custom_columns.py +15 -0
  41. letta/orm/identity.py +11 -11
  42. letta/orm/mcp_server.py +9 -0
  43. letta/orm/message.py +6 -1
  44. letta/orm/run_metrics.py +7 -2
  45. letta/orm/sqlalchemy_base.py +2 -2
  46. letta/orm/tool.py +3 -0
  47. letta/otel/tracing.py +2 -0
  48. letta/prompts/prompt_generator.py +7 -2
  49. letta/schemas/agent.py +41 -10
  50. letta/schemas/agent_file.py +3 -0
  51. letta/schemas/archive.py +4 -2
  52. letta/schemas/block.py +2 -1
  53. letta/schemas/enums.py +36 -3
  54. letta/schemas/file.py +3 -3
  55. letta/schemas/folder.py +2 -1
  56. letta/schemas/group.py +2 -1
  57. letta/schemas/identity.py +18 -9
  58. letta/schemas/job.py +3 -1
  59. letta/schemas/letta_message.py +71 -12
  60. letta/schemas/letta_request.py +7 -3
  61. letta/schemas/letta_stop_reason.py +0 -25
  62. letta/schemas/llm_config.py +8 -2
  63. letta/schemas/mcp.py +80 -83
  64. letta/schemas/mcp_server.py +349 -0
  65. letta/schemas/memory.py +20 -8
  66. letta/schemas/message.py +212 -67
  67. letta/schemas/providers/anthropic.py +13 -6
  68. letta/schemas/providers/azure.py +6 -4
  69. letta/schemas/providers/base.py +8 -4
  70. letta/schemas/providers/bedrock.py +6 -2
  71. letta/schemas/providers/cerebras.py +7 -3
  72. letta/schemas/providers/deepseek.py +2 -1
  73. letta/schemas/providers/google_gemini.py +15 -6
  74. letta/schemas/providers/groq.py +2 -1
  75. letta/schemas/providers/lmstudio.py +9 -6
  76. letta/schemas/providers/mistral.py +2 -1
  77. letta/schemas/providers/openai.py +7 -2
  78. letta/schemas/providers/together.py +9 -3
  79. letta/schemas/providers/xai.py +7 -3
  80. letta/schemas/run.py +7 -2
  81. letta/schemas/run_metrics.py +2 -1
  82. letta/schemas/sandbox_config.py +2 -2
  83. letta/schemas/secret.py +3 -158
  84. letta/schemas/source.py +2 -2
  85. letta/schemas/step.py +2 -2
  86. letta/schemas/tool.py +24 -1
  87. letta/schemas/usage.py +0 -1
  88. letta/server/rest_api/app.py +123 -7
  89. letta/server/rest_api/dependencies.py +3 -0
  90. letta/server/rest_api/interface.py +7 -4
  91. letta/server/rest_api/redis_stream_manager.py +16 -1
  92. letta/server/rest_api/routers/v1/__init__.py +7 -0
  93. letta/server/rest_api/routers/v1/agents.py +332 -322
  94. letta/server/rest_api/routers/v1/archives.py +127 -40
  95. letta/server/rest_api/routers/v1/blocks.py +54 -6
  96. letta/server/rest_api/routers/v1/chat_completions.py +146 -0
  97. letta/server/rest_api/routers/v1/folders.py +27 -35
  98. letta/server/rest_api/routers/v1/groups.py +23 -35
  99. letta/server/rest_api/routers/v1/identities.py +24 -10
  100. letta/server/rest_api/routers/v1/internal_runs.py +107 -0
  101. letta/server/rest_api/routers/v1/internal_templates.py +162 -179
  102. letta/server/rest_api/routers/v1/jobs.py +15 -27
  103. letta/server/rest_api/routers/v1/mcp_servers.py +309 -0
  104. letta/server/rest_api/routers/v1/messages.py +23 -34
  105. letta/server/rest_api/routers/v1/organizations.py +6 -27
  106. letta/server/rest_api/routers/v1/providers.py +35 -62
  107. letta/server/rest_api/routers/v1/runs.py +30 -43
  108. letta/server/rest_api/routers/v1/sandbox_configs.py +6 -4
  109. letta/server/rest_api/routers/v1/sources.py +26 -42
  110. letta/server/rest_api/routers/v1/steps.py +16 -29
  111. letta/server/rest_api/routers/v1/tools.py +17 -13
  112. letta/server/rest_api/routers/v1/users.py +5 -17
  113. letta/server/rest_api/routers/v1/voice.py +18 -27
  114. letta/server/rest_api/streaming_response.py +5 -2
  115. letta/server/rest_api/utils.py +187 -25
  116. letta/server/server.py +27 -22
  117. letta/server/ws_api/server.py +5 -4
  118. letta/services/agent_manager.py +148 -26
  119. letta/services/agent_serialization_manager.py +6 -1
  120. letta/services/archive_manager.py +168 -15
  121. letta/services/block_manager.py +14 -4
  122. letta/services/file_manager.py +33 -29
  123. letta/services/group_manager.py +10 -0
  124. letta/services/helpers/agent_manager_helper.py +65 -11
  125. letta/services/identity_manager.py +105 -4
  126. letta/services/job_manager.py +11 -1
  127. letta/services/mcp/base_client.py +2 -2
  128. letta/services/mcp/oauth_utils.py +33 -8
  129. letta/services/mcp_manager.py +174 -78
  130. letta/services/mcp_server_manager.py +1331 -0
  131. letta/services/message_manager.py +109 -4
  132. letta/services/organization_manager.py +4 -4
  133. letta/services/passage_manager.py +9 -25
  134. letta/services/provider_manager.py +91 -15
  135. letta/services/run_manager.py +72 -15
  136. letta/services/sandbox_config_manager.py +45 -3
  137. letta/services/source_manager.py +15 -8
  138. letta/services/step_manager.py +24 -1
  139. letta/services/streaming_service.py +581 -0
  140. letta/services/summarizer/summarizer.py +1 -1
  141. letta/services/tool_executor/core_tool_executor.py +111 -0
  142. letta/services/tool_executor/files_tool_executor.py +5 -3
  143. letta/services/tool_executor/sandbox_tool_executor.py +2 -2
  144. letta/services/tool_executor/tool_execution_manager.py +1 -1
  145. letta/services/tool_manager.py +10 -3
  146. letta/services/tool_sandbox/base.py +61 -1
  147. letta/services/tool_sandbox/local_sandbox.py +1 -3
  148. letta/services/user_manager.py +2 -2
  149. letta/settings.py +49 -5
  150. letta/system.py +14 -5
  151. letta/utils.py +73 -1
  152. letta/validators.py +105 -0
  153. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/METADATA +4 -2
  154. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/RECORD +157 -151
  155. letta/schemas/letta_ping.py +0 -28
  156. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  157. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/WHEEL +0 -0
  158. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/entry_points.txt +0 -0
  159. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,581 @@
1
+ import json
2
+ import time
3
+ from typing import AsyncIterator, Optional, Union
4
+ from uuid import uuid4
5
+
6
+ from fastapi.responses import StreamingResponse
7
+ from openai.types.chat import ChatCompletionChunk
8
+ from openai.types.chat.chat_completion_chunk import Choice, ChoiceDelta
9
+
10
+ from letta.agents.agent_loop import AgentLoop
11
+ from letta.agents.base_agent_v2 import BaseAgentV2
12
+ from letta.constants import REDIS_RUN_ID_PREFIX
13
+ from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
14
+ from letta.errors import (
15
+ LettaInvalidArgumentError,
16
+ LettaServiceUnavailableError,
17
+ LLMAuthenticationError,
18
+ LLMError,
19
+ LLMRateLimitError,
20
+ LLMTimeoutError,
21
+ PendingApprovalError,
22
+ )
23
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns
24
+ from letta.log import get_logger
25
+ from letta.otel.context import get_ctx_attributes
26
+ from letta.otel.metric_registry import MetricRegistry
27
+ from letta.schemas.agent import AgentState
28
+ from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus
29
+ from letta.schemas.job import LettaRequestConfig
30
+ from letta.schemas.letta_message import AssistantMessage, MessageType
31
+ from letta.schemas.letta_message_content import TextContent
32
+ from letta.schemas.letta_request import LettaStreamingRequest
33
+ from letta.schemas.letta_response import LettaResponse
34
+ from letta.schemas.message import MessageCreate
35
+ from letta.schemas.run import Run as PydanticRun, RunUpdate
36
+ from letta.schemas.usage import LettaUsageStatistics
37
+ from letta.schemas.user import User
38
+ from letta.server.rest_api.redis_stream_manager import create_background_stream_processor, redis_sse_stream_generator
39
+ from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
40
+ from letta.services.run_manager import RunManager
41
+ from letta.settings import settings
42
+ from letta.utils import safe_create_task
43
+
44
+ logger = get_logger(__name__)
45
+
46
+
47
+ class StreamingService:
48
+ """
49
+ Service for managing agent streaming responses.
50
+ Handles run creation, stream generation, error handling, and format conversion.
51
+ """
52
+
53
+ def __init__(self, server):
54
+ """
55
+ Initialize the streaming service.
56
+
57
+ Args:
58
+ server: The SyncServer instance for accessing managers and services
59
+ """
60
+ self.server = server
61
+ self.runs_manager = RunManager() if settings.track_agent_run else None
62
+
63
+ async def create_agent_stream(
64
+ self,
65
+ agent_id: str,
66
+ actor: User,
67
+ request: LettaStreamingRequest,
68
+ run_type: str = "streaming",
69
+ ) -> tuple[Optional[PydanticRun], Union[StreamingResponse, LettaResponse]]:
70
+ """
71
+ Create a streaming response for an agent.
72
+
73
+ Args:
74
+ agent_id: The agent ID to stream from
75
+ actor: The user making the request
76
+ request: The LettaStreamingRequest containing all request parameters
77
+ run_type: Type of run for tracking
78
+
79
+ Returns:
80
+ Tuple of (run object or None, streaming response)
81
+ """
82
+ request_start_timestamp_ns = get_utc_timestamp_ns()
83
+ MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
84
+
85
+ # get redis client
86
+ redis_client = await get_redis_client()
87
+
88
+ # load agent and check eligibility
89
+ agent = await self.server.agent_manager.get_agent_by_id_async(
90
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
91
+ )
92
+
93
+ agent_eligible = self._is_agent_eligible(agent)
94
+ model_compatible = self._is_model_compatible(agent)
95
+ model_compatible_token_streaming = self._is_token_streaming_compatible(agent)
96
+
97
+ # create run if tracking is enabled
98
+ run = None
99
+ run_update_metadata = None
100
+ if settings.track_agent_run:
101
+ run = await self._create_run(agent_id, request, run_type, actor)
102
+ await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
103
+
104
+ try:
105
+ if agent_eligible and model_compatible:
106
+ # use agent loop for streaming
107
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
108
+
109
+ # create the base stream with error handling
110
+ raw_stream = self._create_error_aware_stream(
111
+ agent_loop=agent_loop,
112
+ messages=request.messages,
113
+ max_steps=request.max_steps,
114
+ stream_tokens=request.stream_tokens and model_compatible_token_streaming,
115
+ run_id=run.id if run else None,
116
+ use_assistant_message=request.use_assistant_message,
117
+ request_start_timestamp_ns=request_start_timestamp_ns,
118
+ include_return_message_types=request.include_return_message_types,
119
+ actor=actor,
120
+ )
121
+
122
+ # handle background streaming if requested
123
+ if request.background and settings.track_agent_run:
124
+ if isinstance(redis_client, NoopAsyncRedisClient):
125
+ raise LettaServiceUnavailableError(
126
+ f"Background streaming requires Redis to be running. "
127
+ f"Please ensure Redis is properly configured. "
128
+ f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}",
129
+ service_name="redis",
130
+ )
131
+
132
+ safe_create_task(
133
+ create_background_stream_processor(
134
+ stream_generator=raw_stream,
135
+ redis_client=redis_client,
136
+ run_id=run.id,
137
+ run_manager=self.server.run_manager,
138
+ actor=actor,
139
+ ),
140
+ label=f"background_stream_processor_{run.id}",
141
+ )
142
+
143
+ raw_stream = redis_sse_stream_generator(
144
+ redis_client=redis_client,
145
+ run_id=run.id,
146
+ )
147
+
148
+ # conditionally wrap with keepalive based on request parameter
149
+ if request.include_pings and settings.enable_keepalive:
150
+ stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval, run_id=run.id)
151
+ else:
152
+ stream = raw_stream
153
+
154
+ result = StreamingResponseWithStatusCode(
155
+ stream,
156
+ media_type="text/event-stream",
157
+ )
158
+ else:
159
+ # fallback to non-agent-loop streaming
160
+ result = await self.server.send_message_to_agent(
161
+ agent_id=agent_id,
162
+ actor=actor,
163
+ input_messages=request.messages,
164
+ stream_steps=True,
165
+ stream_tokens=request.stream_tokens,
166
+ use_assistant_message=request.use_assistant_message,
167
+ assistant_message_tool_name=request.assistant_message_tool_name,
168
+ assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
169
+ request_start_timestamp_ns=request_start_timestamp_ns,
170
+ include_return_message_types=request.include_return_message_types,
171
+ )
172
+
173
+ # update run status to running before returning
174
+ if settings.track_agent_run and run:
175
+ run_status = RunStatus.running
176
+
177
+ return run, result
178
+
179
+ except PendingApprovalError as e:
180
+ if settings.track_agent_run:
181
+ run_update_metadata = {"error": str(e)}
182
+ run_status = RunStatus.failed
183
+ raise
184
+ except Exception as e:
185
+ if settings.track_agent_run:
186
+ run_update_metadata = {"error": str(e)}
187
+ run_status = RunStatus.failed
188
+ raise
189
+ finally:
190
+ if settings.track_agent_run and run:
191
+ await self.server.run_manager.update_run_by_id_async(
192
+ run_id=run.id,
193
+ update=RunUpdate(status=run_status, metadata=run_update_metadata),
194
+ actor=actor,
195
+ )
196
+
197
+ async def create_agent_stream_openai_chat_completions(
198
+ self,
199
+ agent_id: str,
200
+ actor: User,
201
+ request: LettaStreamingRequest,
202
+ ) -> StreamingResponse:
203
+ """
204
+ Create OpenAI-compatible chat completions streaming response.
205
+
206
+ Transforms Letta's internal streaming format to match OpenAI's
207
+ ChatCompletionChunk schema, filtering out internal tool execution
208
+ and only streaming assistant text responses.
209
+
210
+ Args:
211
+ agent_id: The agent ID to stream from
212
+ actor: The user making the request
213
+ request: The LettaStreamingRequest containing all request parameters
214
+
215
+ Returns:
216
+ StreamingResponse with OpenAI-formatted SSE chunks
217
+ """
218
+ # load agent to get model info for the completion chunks
219
+ agent = await self.server.agent_manager.get_agent_by_id_async(agent_id, actor)
220
+
221
+ # create standard Letta stream (returns SSE-formatted stream)
222
+ run, letta_stream_response = await self.create_agent_stream(
223
+ agent_id=agent_id,
224
+ actor=actor,
225
+ request=request,
226
+ run_type="openai_chat_completions",
227
+ )
228
+
229
+ # extract the stream iterator from the response
230
+ if isinstance(letta_stream_response, StreamingResponseWithStatusCode):
231
+ letta_stream = letta_stream_response.body_iterator
232
+ else:
233
+ raise LettaInvalidArgumentError(
234
+ "Agent is not compatible with streaming mode",
235
+ argument_name="model",
236
+ )
237
+
238
+ # create transformer with agent's model info
239
+ model_name = agent.llm_config.model if agent.llm_config else "unknown"
240
+ completion_id = f"chatcmpl-{run.id if run else str(uuid4())}"
241
+
242
+ transformer = OpenAIChatCompletionsStreamTransformer(
243
+ model=model_name,
244
+ completion_id=completion_id,
245
+ )
246
+
247
+ # transform Letta SSE stream to OpenAI format (parser handles SSE strings)
248
+ openai_stream = transformer.transform_stream(letta_stream)
249
+
250
+ return StreamingResponse(
251
+ openai_stream,
252
+ media_type="text/event-stream",
253
+ )
254
+
255
+ def _create_error_aware_stream(
256
+ self,
257
+ agent_loop: BaseAgentV2,
258
+ messages: list[MessageCreate],
259
+ max_steps: int,
260
+ stream_tokens: bool,
261
+ run_id: Optional[str],
262
+ use_assistant_message: bool,
263
+ request_start_timestamp_ns: int,
264
+ include_return_message_types: Optional[list[MessageType]],
265
+ actor: User,
266
+ ) -> AsyncIterator:
267
+ """
268
+ Create a stream with unified error handling.
269
+
270
+ Returns:
271
+ Async iterator that yields chunks with proper error handling
272
+ """
273
+
274
+ async def error_aware_stream():
275
+ """Stream that handles early LLM errors gracefully in streaming format."""
276
+ try:
277
+ stream = agent_loop.stream(
278
+ input_messages=messages,
279
+ max_steps=max_steps,
280
+ stream_tokens=stream_tokens,
281
+ run_id=run_id,
282
+ use_assistant_message=use_assistant_message,
283
+ request_start_timestamp_ns=request_start_timestamp_ns,
284
+ include_return_message_types=include_return_message_types,
285
+ )
286
+
287
+ async for chunk in stream:
288
+ yield chunk
289
+
290
+ # update run status after completion
291
+ if run_id and self.runs_manager:
292
+ if agent_loop.stop_reason.stop_reason.value == "cancelled":
293
+ run_status = RunStatus.cancelled
294
+ else:
295
+ run_status = RunStatus.completed
296
+
297
+ await self.runs_manager.update_run_by_id_async(
298
+ run_id=run_id,
299
+ update=RunUpdate(status=run_status, stop_reason=agent_loop.stop_reason.stop_reason.value),
300
+ actor=actor,
301
+ )
302
+
303
+ except LLMTimeoutError as e:
304
+ error_data = {"error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}}
305
+ yield (f"data: {json.dumps(error_data)}\n\n", 504)
306
+ except LLMRateLimitError as e:
307
+ error_data = {
308
+ "error": {
309
+ "type": "llm_rate_limit",
310
+ "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
311
+ "detail": str(e),
312
+ }
313
+ }
314
+ yield (f"data: {json.dumps(error_data)}\n\n", 429)
315
+ except LLMAuthenticationError as e:
316
+ error_data = {
317
+ "error": {
318
+ "type": "llm_authentication",
319
+ "message": "Authentication failed with the LLM model provider.",
320
+ "detail": str(e),
321
+ }
322
+ }
323
+ yield (f"data: {json.dumps(error_data)}\n\n", 401)
324
+ except LLMError as e:
325
+ error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
326
+ yield (f"data: {json.dumps(error_data)}\n\n", 502)
327
+ except Exception as e:
328
+ error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
329
+ yield (f"data: {json.dumps(error_data)}\n\n", 500)
330
+
331
+ return error_aware_stream()
332
+
333
+ def _is_agent_eligible(self, agent: AgentState) -> bool:
334
+ """Check if agent is eligible for streaming."""
335
+ return agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
336
+
337
+ def _is_model_compatible(self, agent: AgentState) -> bool:
338
+ """Check if agent's model is compatible with streaming."""
339
+ return agent.llm_config.model_endpoint_type in [
340
+ "anthropic",
341
+ "openai",
342
+ "together",
343
+ "google_ai",
344
+ "google_vertex",
345
+ "bedrock",
346
+ "ollama",
347
+ "azure",
348
+ "xai",
349
+ "groq",
350
+ "deepseek",
351
+ ]
352
+
353
+ def _is_token_streaming_compatible(self, agent: AgentState) -> bool:
354
+ """Check if agent's model supports token-level streaming."""
355
+ base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"]
356
+ google_letta_v1 = agent.agent_type == AgentType.letta_v1_agent and agent.llm_config.model_endpoint_type in [
357
+ "google_ai",
358
+ "google_vertex",
359
+ ]
360
+ return base_compatible or google_letta_v1
361
+
362
+ async def _create_run(self, agent_id: str, request: LettaStreamingRequest, run_type: str, actor: User) -> PydanticRun:
363
+ """Create a run for tracking execution."""
364
+ run = await self.runs_manager.create_run(
365
+ pydantic_run=PydanticRun(
366
+ agent_id=agent_id,
367
+ background=request.background or False,
368
+ metadata={
369
+ "run_type": run_type,
370
+ },
371
+ request_config=LettaRequestConfig.from_letta_request(request),
372
+ ),
373
+ actor=actor,
374
+ )
375
+ return run
376
+
377
+ async def _update_run_status(
378
+ self,
379
+ run_id: str,
380
+ status: RunStatus,
381
+ actor: User,
382
+ error: Optional[str] = None,
383
+ stop_reason: Optional[str] = None,
384
+ ):
385
+ """Update the status of a run."""
386
+ if not self.runs_manager:
387
+ return
388
+
389
+ update = RunUpdate(status=status)
390
+ if error:
391
+ update.metadata = {"error": error}
392
+ if stop_reason:
393
+ update.stop_reason = stop_reason
394
+
395
+ await self.runs_manager.update_run_by_id_async(
396
+ run_id=run_id,
397
+ update=update,
398
+ actor=actor,
399
+ )
400
+
401
+
402
+ class OpenAIChatCompletionsStreamTransformer:
403
+ """
404
+ Transforms Letta streaming messages into OpenAI ChatCompletionChunk format.
405
+ Filters out internal tool execution and only streams assistant text responses.
406
+ """
407
+
408
+ def __init__(self, model: str, completion_id: str):
409
+ """
410
+ Initialize the transformer.
411
+
412
+ Args:
413
+ model: Model name to include in chunks
414
+ completion_id: Unique ID for this completion (format: chatcmpl-{uuid})
415
+ """
416
+ self.model = model
417
+ self.completion_id = completion_id
418
+ self.first_chunk = True
419
+ self.created = int(time.time())
420
+
421
+ # TODO: This is lowkey really ugly and poor code design, but this works fine for now
422
+ def _parse_sse_chunk(self, sse_string: str):
423
+ """
424
+ Parse SSE-formatted string back into a message object.
425
+
426
+ Args:
427
+ sse_string: SSE formatted string like "data: {...}\n\n"
428
+
429
+ Returns:
430
+ Parsed message object or None if can't parse
431
+ """
432
+ try:
433
+ # strip SSE formatting
434
+ if sse_string.startswith("data: "):
435
+ json_str = sse_string[6:].strip()
436
+
437
+ # handle [DONE] marker
438
+ if json_str == "[DONE]":
439
+ return MessageStreamStatus.done
440
+
441
+ # parse JSON
442
+ data = json.loads(json_str)
443
+
444
+ # reconstruct message object based on message_type
445
+ message_type = data.get("message_type")
446
+
447
+ if message_type == "assistant_message":
448
+ return AssistantMessage(**data)
449
+ elif message_type == "usage_statistics":
450
+ return LettaUsageStatistics(**data)
451
+ elif message_type == "stop_reason":
452
+ # skip stop_reason, we use [DONE] instead
453
+ return None
454
+ else:
455
+ # other message types we skip
456
+ return None
457
+ return None
458
+ except Exception as e:
459
+ logger.warning(f"Failed to parse SSE chunk: {e}")
460
+ return None
461
+
462
+ async def transform_stream(self, letta_stream: AsyncIterator) -> AsyncIterator[str]:
463
+ """
464
+ Transform Letta stream to OpenAI ChatCompletionChunk SSE format.
465
+
466
+ Args:
467
+ letta_stream: Async iterator of Letta messages (may be SSE strings or objects)
468
+
469
+ Yields:
470
+ SSE-formatted strings: "data: {json}\n\n"
471
+ """
472
+ try:
473
+ async for raw_chunk in letta_stream:
474
+ # parse SSE string if needed
475
+ if isinstance(raw_chunk, str):
476
+ chunk = self._parse_sse_chunk(raw_chunk)
477
+ if chunk is None:
478
+ continue # skip unparseable or filtered chunks
479
+ else:
480
+ chunk = raw_chunk
481
+
482
+ # only process assistant messages
483
+ if isinstance(chunk, AssistantMessage):
484
+ async for sse_chunk in self._process_assistant_message(chunk):
485
+ print(f"CHUNK: {sse_chunk}")
486
+ yield sse_chunk
487
+
488
+ # handle completion status
489
+ elif chunk == MessageStreamStatus.done:
490
+ # emit final chunk with finish_reason
491
+ final_chunk = ChatCompletionChunk(
492
+ id=self.completion_id,
493
+ object="chat.completion.chunk",
494
+ created=self.created,
495
+ model=self.model,
496
+ choices=[
497
+ Choice(
498
+ index=0,
499
+ delta=ChoiceDelta(),
500
+ finish_reason="stop",
501
+ )
502
+ ],
503
+ )
504
+ yield f"data: {final_chunk.model_dump_json()}\n\n"
505
+ yield "data: [DONE]\n\n"
506
+
507
+ except Exception as e:
508
+ logger.error(f"Error in OpenAI stream transformation: {e}", exc_info=True)
509
+ error_chunk = {"error": {"message": str(e), "type": "server_error"}}
510
+ yield f"data: {json.dumps(error_chunk)}\n\n"
511
+
512
+ async def _process_assistant_message(self, message: AssistantMessage) -> AsyncIterator[str]:
513
+ """
514
+ Convert AssistantMessage to OpenAI ChatCompletionChunk(s).
515
+
516
+ Args:
517
+ message: Letta AssistantMessage with content
518
+
519
+ Yields:
520
+ SSE-formatted chunk strings
521
+ """
522
+ # extract text from content (can be string or list of TextContent)
523
+ text_content = self._extract_text_content(message.content)
524
+ if not text_content:
525
+ return
526
+
527
+ # emit role on first chunk only
528
+ if self.first_chunk:
529
+ self.first_chunk = False
530
+ # first chunk includes role
531
+ chunk = ChatCompletionChunk(
532
+ id=self.completion_id,
533
+ object="chat.completion.chunk",
534
+ created=self.created,
535
+ model=self.model,
536
+ choices=[
537
+ Choice(
538
+ index=0,
539
+ delta=ChoiceDelta(role="assistant", content=text_content),
540
+ finish_reason=None,
541
+ )
542
+ ],
543
+ )
544
+ else:
545
+ # subsequent chunks just have content
546
+ chunk = ChatCompletionChunk(
547
+ id=self.completion_id,
548
+ object="chat.completion.chunk",
549
+ created=self.created,
550
+ model=self.model,
551
+ choices=[
552
+ Choice(
553
+ index=0,
554
+ delta=ChoiceDelta(content=text_content),
555
+ finish_reason=None,
556
+ )
557
+ ],
558
+ )
559
+
560
+ yield f"data: {chunk.model_dump_json()}\n\n"
561
+
562
+ def _extract_text_content(self, content: Union[str, list[TextContent]]) -> str:
563
+ """
564
+ Extract text string from content field.
565
+
566
+ Args:
567
+ content: Either a string or list of TextContent objects
568
+
569
+ Returns:
570
+ Extracted text string
571
+ """
572
+ if isinstance(content, str):
573
+ return content
574
+ elif isinstance(content, list):
575
+ # concatenate all TextContent items
576
+ text_parts = []
577
+ for item in content:
578
+ if isinstance(item, TextContent):
579
+ text_parts.append(item.text)
580
+ return "".join(text_parts)
581
+ return ""
@@ -106,7 +106,7 @@ class Summarizer:
106
106
  try:
107
107
  t.result() # This re-raises exceptions from the task
108
108
  except Exception:
109
- logger.error("Background task failed: %s", traceback.format_exc())
109
+ logger.exception("Background task failed")
110
110
 
111
111
  task.add_done_callback(callback)
112
112
  return task