letta-nightly 0.13.0.dev20251031104146__py3-none-any.whl → 0.13.1.dev20251031234110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (101) hide show
  1. letta/__init__.py +1 -1
  2. letta/adapters/simple_llm_stream_adapter.py +1 -0
  3. letta/agents/letta_agent_v2.py +8 -0
  4. letta/agents/letta_agent_v3.py +120 -27
  5. letta/agents/temporal/activities/__init__.py +25 -0
  6. letta/agents/temporal/activities/create_messages.py +26 -0
  7. letta/agents/temporal/activities/create_step.py +57 -0
  8. letta/agents/temporal/activities/example_activity.py +9 -0
  9. letta/agents/temporal/activities/execute_tool.py +130 -0
  10. letta/agents/temporal/activities/llm_request.py +114 -0
  11. letta/agents/temporal/activities/prepare_messages.py +27 -0
  12. letta/agents/temporal/activities/refresh_context.py +160 -0
  13. letta/agents/temporal/activities/summarize_conversation_history.py +77 -0
  14. letta/agents/temporal/activities/update_message_ids.py +25 -0
  15. letta/agents/temporal/activities/update_run.py +43 -0
  16. letta/agents/temporal/constants.py +59 -0
  17. letta/agents/temporal/temporal_agent_workflow.py +704 -0
  18. letta/agents/temporal/types.py +275 -0
  19. letta/constants.py +8 -0
  20. letta/errors.py +4 -0
  21. letta/functions/function_sets/base.py +0 -11
  22. letta/groups/helpers.py +7 -1
  23. letta/groups/sleeptime_multi_agent_v4.py +4 -3
  24. letta/interfaces/anthropic_streaming_interface.py +0 -1
  25. letta/interfaces/openai_streaming_interface.py +103 -100
  26. letta/llm_api/anthropic_client.py +57 -12
  27. letta/llm_api/bedrock_client.py +1 -0
  28. letta/llm_api/deepseek_client.py +3 -2
  29. letta/llm_api/google_vertex_client.py +1 -0
  30. letta/llm_api/groq_client.py +1 -0
  31. letta/llm_api/llm_client_base.py +15 -1
  32. letta/llm_api/openai.py +2 -2
  33. letta/llm_api/openai_client.py +17 -3
  34. letta/llm_api/xai_client.py +1 -0
  35. letta/orm/organization.py +4 -0
  36. letta/orm/sqlalchemy_base.py +7 -0
  37. letta/otel/tracing.py +131 -4
  38. letta/schemas/agent_file.py +10 -10
  39. letta/schemas/block.py +22 -3
  40. letta/schemas/enums.py +21 -0
  41. letta/schemas/environment_variables.py +3 -2
  42. letta/schemas/group.py +3 -3
  43. letta/schemas/letta_response.py +36 -4
  44. letta/schemas/llm_batch_job.py +3 -3
  45. letta/schemas/llm_config.py +27 -3
  46. letta/schemas/mcp.py +3 -2
  47. letta/schemas/mcp_server.py +3 -2
  48. letta/schemas/message.py +167 -49
  49. letta/schemas/organization.py +2 -1
  50. letta/schemas/passage.py +2 -1
  51. letta/schemas/provider_trace.py +2 -1
  52. letta/schemas/providers/openrouter.py +1 -2
  53. letta/schemas/run_metrics.py +2 -1
  54. letta/schemas/sandbox_config.py +3 -1
  55. letta/schemas/step_metrics.py +2 -1
  56. letta/schemas/tool_rule.py +2 -2
  57. letta/schemas/user.py +2 -1
  58. letta/server/rest_api/app.py +5 -1
  59. letta/server/rest_api/routers/v1/__init__.py +4 -0
  60. letta/server/rest_api/routers/v1/agents.py +71 -9
  61. letta/server/rest_api/routers/v1/blocks.py +7 -7
  62. letta/server/rest_api/routers/v1/groups.py +40 -0
  63. letta/server/rest_api/routers/v1/identities.py +2 -2
  64. letta/server/rest_api/routers/v1/internal_agents.py +31 -0
  65. letta/server/rest_api/routers/v1/internal_blocks.py +177 -0
  66. letta/server/rest_api/routers/v1/internal_runs.py +25 -1
  67. letta/server/rest_api/routers/v1/runs.py +2 -22
  68. letta/server/rest_api/routers/v1/tools.py +10 -0
  69. letta/server/server.py +5 -2
  70. letta/services/agent_manager.py +4 -4
  71. letta/services/archive_manager.py +16 -0
  72. letta/services/group_manager.py +44 -0
  73. letta/services/helpers/run_manager_helper.py +2 -2
  74. letta/services/lettuce/lettuce_client.py +148 -0
  75. letta/services/mcp/base_client.py +9 -3
  76. letta/services/run_manager.py +148 -37
  77. letta/services/source_manager.py +91 -3
  78. letta/services/step_manager.py +2 -3
  79. letta/services/streaming_service.py +52 -13
  80. letta/services/summarizer/summarizer.py +28 -2
  81. letta/services/tool_executor/builtin_tool_executor.py +1 -1
  82. letta/services/tool_executor/core_tool_executor.py +2 -117
  83. letta/services/tool_schema_generator.py +2 -2
  84. letta/validators.py +21 -0
  85. {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/METADATA +1 -1
  86. {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/RECORD +89 -84
  87. letta/agent.py +0 -1758
  88. letta/cli/cli_load.py +0 -16
  89. letta/client/__init__.py +0 -0
  90. letta/client/streaming.py +0 -95
  91. letta/client/utils.py +0 -78
  92. letta/functions/async_composio_toolset.py +0 -109
  93. letta/functions/composio_helpers.py +0 -96
  94. letta/helpers/composio_helpers.py +0 -38
  95. letta/orm/job_messages.py +0 -33
  96. letta/schemas/providers.py +0 -1617
  97. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -132
  98. letta/services/tool_executor/composio_tool_executor.py +0 -57
  99. {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/WHEEL +0 -0
  100. {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/entry_points.txt +0 -0
  101. {letta_nightly-0.13.0.dev20251031104146.dist-info → letta_nightly-0.13.1.dev20251031234110.dist-info}/licenses/LICENSE +0 -0
@@ -14,7 +14,7 @@ from letta.orm.run_metrics import RunMetrics as RunMetricsModel
14
14
  from letta.orm.sqlalchemy_base import AccessType
15
15
  from letta.orm.step import Step as StepModel
16
16
  from letta.otel.tracing import log_event, trace_method
17
- from letta.schemas.enums import AgentType, ComparisonOperator, MessageRole, RunStatus, PrimitiveType
17
+ from letta.schemas.enums import AgentType, ComparisonOperator, MessageRole, PrimitiveType, RunStatus
18
18
  from letta.schemas.job import LettaRequestConfig
19
19
  from letta.schemas.letta_message import LettaMessage, LettaMessageUnion
20
20
  from letta.schemas.letta_response import LettaResponse
@@ -63,14 +63,16 @@ class RunManager:
63
63
 
64
64
  run = RunModel(**run_data)
65
65
  run.organization_id = organization_id
66
- run = await run.create_async(session, actor=actor, no_commit=True, no_refresh=True)
67
-
68
- # Create run metrics with start timestamp
69
- import time
70
66
 
71
67
  # Get the project_id from the agent
72
68
  agent = await session.get(AgentModel, agent_id)
73
69
  project_id = agent.project_id if agent else None
70
+ run.project_id = project_id
71
+
72
+ run = await run.create_async(session, actor=actor, no_commit=True, no_refresh=True)
73
+
74
+ # Create run metrics with start timestamp
75
+ import time
74
76
 
75
77
  metrics = RunMetricsModel(
76
78
  id=run.id,
@@ -95,6 +97,34 @@ class RunManager:
95
97
  raise NoResultFound(f"Run with id {run_id} not found")
96
98
  return run.to_pydantic()
97
99
 
100
+ @enforce_types
101
+ async def get_run_with_status(self, run_id: str, actor: PydanticUser) -> PydanticRun:
102
+ """Get a run by its ID and update status from Lettuce if applicable."""
103
+ run = await self.get_run_by_id(run_id=run_id, actor=actor)
104
+
105
+ use_lettuce = run.metadata and run.metadata.get("lettuce")
106
+ if use_lettuce and run.status not in [RunStatus.completed, RunStatus.failed, RunStatus.cancelled]:
107
+ try:
108
+ from letta.services.lettuce import LettuceClient
109
+
110
+ lettuce_client = await LettuceClient.create()
111
+ status = await lettuce_client.get_status(run_id=run_id)
112
+
113
+ # Map the status to our enum
114
+ if status == "RUNNING":
115
+ run.status = RunStatus.running
116
+ elif status == "COMPLETED":
117
+ run.status = RunStatus.completed
118
+ elif status == "FAILED":
119
+ run.status = RunStatus.failed
120
+ elif status == "CANCELLED":
121
+ run.status = RunStatus.cancelled
122
+ except Exception as e:
123
+ logger.error(f"Failed to get status from Lettuce for run {run_id}: {str(e)}")
124
+ # Return run with current status from DB if Lettuce fails
125
+
126
+ return run
127
+
98
128
  @enforce_types
99
129
  async def list_runs(
100
130
  self,
@@ -112,12 +142,27 @@ class RunManager:
112
142
  step_count: Optional[int] = None,
113
143
  step_count_operator: ComparisonOperator = ComparisonOperator.EQ,
114
144
  tools_used: Optional[List[str]] = None,
145
+ project_id: Optional[str] = None,
146
+ order_by: Literal["created_at", "duration"] = "created_at",
147
+ duration_percentile: Optional[int] = None,
148
+ duration_filter: Optional[dict] = None,
149
+ start_date: Optional[datetime] = None,
150
+ end_date: Optional[datetime] = None,
115
151
  ) -> List[PydanticRun]:
116
152
  """List runs with filtering options."""
117
153
  async with db_registry.async_session() as session:
118
- from sqlalchemy import or_, select
154
+ from sqlalchemy import func, or_, select
119
155
 
120
- query = select(RunModel).filter(RunModel.organization_id == actor.organization_id)
156
+ # Always join with run_metrics to get duration data
157
+ query = (
158
+ select(RunModel, RunMetricsModel.run_ns)
159
+ .outerjoin(RunMetricsModel, RunModel.id == RunMetricsModel.id)
160
+ .filter(RunModel.organization_id == actor.organization_id)
161
+ )
162
+
163
+ # Filter by project_id if provided
164
+ if project_id:
165
+ query = query.filter(RunModel.project_id == project_id)
121
166
 
122
167
  # Handle agent filtering
123
168
  if agent_id:
@@ -141,41 +186,107 @@ class RunManager:
141
186
  if template_family:
142
187
  query = query.filter(RunModel.base_template_id == template_family)
143
188
 
144
- # Filter by step_count and/or tools_used - join with run_metrics
145
- if step_count is not None or tools_used:
146
- query = query.join(RunMetricsModel, RunModel.id == RunMetricsModel.id)
147
-
148
- # Filter by step_count with the specified operator
149
- if step_count is not None:
150
- if step_count_operator == ComparisonOperator.EQ:
151
- query = query.filter(RunMetricsModel.num_steps == step_count)
152
- elif step_count_operator == ComparisonOperator.GTE:
153
- query = query.filter(RunMetricsModel.num_steps >= step_count)
154
- elif step_count_operator == ComparisonOperator.LTE:
155
- query = query.filter(RunMetricsModel.num_steps <= step_count)
156
-
157
- # Filter by tools used ids
158
- if tools_used:
159
- from sqlalchemy import String, cast as sa_cast, type_coerce
160
- from sqlalchemy.dialects.postgresql import ARRAY, JSONB
161
-
162
- # Use ?| operator to check if any tool_id exists in the array (OR logic)
163
- jsonb_tools = sa_cast(RunMetricsModel.tools_used, JSONB)
164
- tools_array = type_coerce(tools_used, ARRAY(String))
165
- query = query.filter(jsonb_tools.op("?|")(tools_array))
166
-
167
- # Apply pagination
168
- from letta.services.helpers.run_manager_helper import _apply_pagination_async
189
+ # Filter by date range
190
+ if start_date:
191
+ query = query.filter(RunModel.created_at >= start_date)
192
+ if end_date:
193
+ query = query.filter(RunModel.created_at <= end_date)
194
+
195
+ # Filter by step_count with the specified operator
196
+ if step_count is not None:
197
+ if step_count_operator == ComparisonOperator.EQ:
198
+ query = query.filter(RunMetricsModel.num_steps == step_count)
199
+ elif step_count_operator == ComparisonOperator.GTE:
200
+ query = query.filter(RunMetricsModel.num_steps >= step_count)
201
+ elif step_count_operator == ComparisonOperator.LTE:
202
+ query = query.filter(RunMetricsModel.num_steps <= step_count)
203
+
204
+ # Filter by tools used ids
205
+ if tools_used:
206
+ from sqlalchemy import String, cast as sa_cast, type_coerce
207
+ from sqlalchemy.dialects.postgresql import ARRAY, JSONB
208
+
209
+ # Use ?| operator to check if any tool_id exists in the array (OR logic)
210
+ jsonb_tools = sa_cast(RunMetricsModel.tools_used, JSONB)
211
+ tools_array = type_coerce(tools_used, ARRAY(String))
212
+ query = query.filter(jsonb_tools.op("?|")(tools_array))
213
+
214
+ # Ensure run_ns is not null when working with duration
215
+ if order_by == "duration" or duration_percentile is not None or duration_filter is not None:
216
+ query = query.filter(RunMetricsModel.run_ns.isnot(None))
217
+
218
+ # Apply duration filter if requested
219
+ if duration_filter is not None:
220
+ duration_value = duration_filter.get("value") if isinstance(duration_filter, dict) else duration_filter.value
221
+ duration_operator = duration_filter.get("operator") if isinstance(duration_filter, dict) else duration_filter.operator
222
+
223
+ if duration_operator == "gt":
224
+ query = query.filter(RunMetricsModel.run_ns > duration_value)
225
+ elif duration_operator == "lt":
226
+ query = query.filter(RunMetricsModel.run_ns < duration_value)
227
+ elif duration_operator == "eq":
228
+ query = query.filter(RunMetricsModel.run_ns == duration_value)
229
+
230
+ # Apply duration percentile filter if requested
231
+ if duration_percentile is not None:
232
+ # Calculate the percentile threshold
233
+ percentile_query = (
234
+ select(func.percentile_cont(duration_percentile / 100.0).within_group(RunMetricsModel.run_ns))
235
+ .select_from(RunMetricsModel)
236
+ .join(RunModel, RunModel.id == RunMetricsModel.id)
237
+ .filter(RunModel.organization_id == actor.organization_id)
238
+ .filter(RunMetricsModel.run_ns.isnot(None))
239
+ )
169
240
 
170
- query = await _apply_pagination_async(query, before, after, session, ascending=ascending)
241
+ # Apply same filters to percentile calculation
242
+ if project_id:
243
+ percentile_query = percentile_query.filter(RunModel.project_id == project_id)
244
+ if agent_ids:
245
+ percentile_query = percentile_query.filter(RunModel.agent_id.in_(agent_ids))
246
+ if statuses:
247
+ percentile_query = percentile_query.filter(RunModel.status.in_(statuses))
248
+
249
+ # Execute percentile query
250
+ percentile_result = await session.execute(percentile_query)
251
+ percentile_threshold = percentile_result.scalar()
252
+
253
+ # Filter by percentile threshold (runs slower than the percentile)
254
+ if percentile_threshold is not None:
255
+ query = query.filter(RunMetricsModel.run_ns >= percentile_threshold)
256
+
257
+ # Apply sorting based on order_by
258
+ if order_by == "duration":
259
+ # Sort by duration
260
+ if ascending:
261
+ query = query.order_by(RunMetricsModel.run_ns.asc())
262
+ else:
263
+ query = query.order_by(RunMetricsModel.run_ns.desc())
264
+ else:
265
+ # Apply pagination for created_at ordering
266
+ from letta.services.helpers.run_manager_helper import _apply_pagination_async
267
+
268
+ query = await _apply_pagination_async(query, before, after, session, ascending=ascending)
171
269
 
172
270
  # Apply limit
173
271
  if limit:
174
272
  query = query.limit(limit)
175
273
 
176
274
  result = await session.execute(query)
177
- runs = result.scalars().all()
178
- return [run.to_pydantic() for run in runs]
275
+ rows = result.all()
276
+
277
+ # Populate total_duration_ns from run_metrics.run_ns
278
+ pydantic_runs = []
279
+ for row in rows:
280
+ run_model = row[0]
281
+ run_ns = row[1]
282
+
283
+ pydantic_run = run_model.to_pydantic()
284
+ if run_ns is not None:
285
+ pydantic_run.total_duration_ns = run_ns
286
+
287
+ pydantic_runs.append(pydantic_run)
288
+
289
+ return pydantic_runs
179
290
 
180
291
  @enforce_types
181
292
  @raise_on_invalid_id(param_name="run_id", expected_prefix=PrimitiveType.RUN)
@@ -323,8 +434,8 @@ class RunManager:
323
434
  logger.error(error_message)
324
435
  result["callback_error"] = error_message
325
436
  # Continue silently - callback failures should not affect run completion
326
- finally:
327
- return result
437
+
438
+ return result
328
439
 
329
440
  @enforce_types
330
441
  @raise_on_invalid_id(param_name="run_id", expected_prefix=PrimitiveType.RUN)
@@ -326,13 +326,25 @@ class SourceManager:
326
326
  @enforce_types
327
327
  @trace_method
328
328
  @raise_on_invalid_id(param_name="source_id", expected_prefix=PrimitiveType.SOURCE)
329
- async def get_agents_for_source_id(self, source_id: str, actor: PydanticUser) -> List[str]:
329
+ async def get_agents_for_source_id(
330
+ self,
331
+ source_id: str,
332
+ actor: PydanticUser,
333
+ before: Optional[str] = None,
334
+ after: Optional[str] = None,
335
+ limit: Optional[int] = 50,
336
+ ascending: bool = True,
337
+ ) -> List[str]:
330
338
  """
331
339
  Get all agent IDs associated with a given source ID.
332
340
 
333
341
  Args:
334
342
  source_id: ID of the source to find agents for
335
343
  actor: User performing the action
344
+ before: Agent ID cursor for pagination (upper bound)
345
+ after: Agent ID cursor for pagination (lower bound)
346
+ limit: Maximum number of agent IDs to return
347
+ ascending: Sort direction by creation time
336
348
 
337
349
  Returns:
338
350
  List[str]: List of agent IDs that have this source attached
@@ -341,8 +353,84 @@ class SourceManager:
341
353
  # Verify source exists and user has permission to access it
342
354
  await self._validate_source_exists_async(session, source_id, actor)
343
355
 
344
- # Query the junction table directly for performance
345
- query = select(SourcesAgents.agent_id).where(SourcesAgents.source_id == source_id)
356
+ # Get reference objects for pagination
357
+ before_obj = None
358
+ after_obj = None
359
+
360
+ if before:
361
+ before_obj = await session.get(AgentModel, before)
362
+ if not before_obj:
363
+ from letta.orm.errors import NoResultFound
364
+
365
+ raise NoResultFound(f"No Agent found with id {before}")
366
+
367
+ if after:
368
+ after_obj = await session.get(AgentModel, after)
369
+ if not after_obj:
370
+ from letta.orm.errors import NoResultFound
371
+
372
+ raise NoResultFound(f"No Agent found with id {after}")
373
+
374
+ # Build query with join to AgentModel for ordering and pagination
375
+ query = (
376
+ select(AgentModel.id)
377
+ .join(SourcesAgents, AgentModel.id == SourcesAgents.agent_id)
378
+ .where(
379
+ SourcesAgents.source_id == source_id,
380
+ AgentModel.organization_id == actor.organization_id,
381
+ AgentModel.is_deleted == False,
382
+ )
383
+ )
384
+
385
+ # Apply pagination conditions
386
+ if before_obj or after_obj:
387
+ from sqlalchemy import and_, or_
388
+
389
+ conditions = []
390
+
391
+ if before_obj and after_obj:
392
+ # Window-based query
393
+ conditions.append(
394
+ or_(
395
+ AgentModel.created_at < before_obj.created_at,
396
+ and_(AgentModel.created_at == before_obj.created_at, AgentModel.id < before_obj.id),
397
+ )
398
+ )
399
+ conditions.append(
400
+ or_(
401
+ AgentModel.created_at > after_obj.created_at,
402
+ and_(AgentModel.created_at == after_obj.created_at, AgentModel.id > after_obj.id),
403
+ )
404
+ )
405
+ else:
406
+ if before_obj:
407
+ conditions.append(
408
+ or_(
409
+ AgentModel.created_at < before_obj.created_at
410
+ if ascending
411
+ else AgentModel.created_at > before_obj.created_at,
412
+ and_(AgentModel.created_at == before_obj.created_at, AgentModel.id < before_obj.id),
413
+ )
414
+ )
415
+ if after_obj:
416
+ conditions.append(
417
+ or_(
418
+ AgentModel.created_at > after_obj.created_at if ascending else AgentModel.created_at < after_obj.created_at,
419
+ and_(AgentModel.created_at == after_obj.created_at, AgentModel.id > after_obj.id),
420
+ )
421
+ )
422
+
423
+ if conditions:
424
+ query = query.where(and_(*conditions))
425
+
426
+ # Apply ordering
427
+ if ascending:
428
+ query = query.order_by(AgentModel.created_at.asc(), AgentModel.id.asc())
429
+ else:
430
+ query = query.order_by(AgentModel.created_at.desc(), AgentModel.id.desc())
431
+
432
+ # Apply limit
433
+ query = query.limit(limit)
346
434
 
347
435
  result = await session.execute(query)
348
436
  agent_ids = result.scalars().all()
@@ -278,15 +278,14 @@ class StepManager:
278
278
  ascending: bool = False,
279
279
  ) -> List[PydanticMessage]:
280
280
  async with db_registry.async_session() as session:
281
- messages = MessageModel.list(
281
+ messages = await MessageModel.list_async(
282
282
  db_session=session,
283
283
  before=before,
284
284
  after=after,
285
285
  ascending=ascending,
286
286
  limit=limit,
287
287
  actor=actor,
288
- join_model=StepModel,
289
- join_conditions=[MessageModel.step.id == step_id],
288
+ step_id=step_id,
290
289
  )
291
290
  return [message.to_pydantic() for message in messages]
292
291
 
@@ -31,6 +31,7 @@ from letta.schemas.letta_message import AssistantMessage, MessageType
31
31
  from letta.schemas.letta_message_content import TextContent
32
32
  from letta.schemas.letta_request import LettaStreamingRequest
33
33
  from letta.schemas.letta_response import LettaResponse
34
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
34
35
  from letta.schemas.message import MessageCreate
35
36
  from letta.schemas.run import Run as PydanticRun, RunUpdate
36
37
  from letta.schemas.usage import LettaUsageStatistics
@@ -273,6 +274,11 @@ class StreamingService:
273
274
 
274
275
  async def error_aware_stream():
275
276
  """Stream that handles early LLM errors gracefully in streaming format."""
277
+ run_status = None
278
+ run_update_metadata = None
279
+ stop_reason = None
280
+ error_data = None
281
+
276
282
  try:
277
283
  stream = agent_loop.stream(
278
284
  input_messages=messages,
@@ -287,23 +293,23 @@ class StreamingService:
287
293
  async for chunk in stream:
288
294
  yield chunk
289
295
 
290
- # update run status after completion
291
- if run_id and self.runs_manager:
292
- if agent_loop.stop_reason.stop_reason.value == "cancelled":
293
- run_status = RunStatus.cancelled
294
- else:
295
- run_status = RunStatus.completed
296
-
297
- await self.runs_manager.update_run_by_id_async(
298
- run_id=run_id,
299
- update=RunUpdate(status=run_status, stop_reason=agent_loop.stop_reason.stop_reason.value),
300
- actor=actor,
301
- )
296
+ # set run status after successful completion
297
+ if agent_loop.stop_reason.stop_reason.value == "cancelled":
298
+ run_status = RunStatus.cancelled
299
+ else:
300
+ run_status = RunStatus.completed
301
+ stop_reason = agent_loop.stop_reason.stop_reason.value
302
302
 
303
303
  except LLMTimeoutError as e:
304
+ run_status = RunStatus.failed
304
305
  error_data = {"error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}}
306
+ stop_reason = StopReasonType.llm_api_error
307
+ logger.error(f"Run {run_id} stopped with LLM timeout error: {e}, error_data: {error_data}")
305
308
  yield (f"data: {json.dumps(error_data)}\n\n", 504)
309
+ # Send [DONE] marker to properly close the stream
310
+ yield "data: [DONE]\n\n"
306
311
  except LLMRateLimitError as e:
312
+ run_status = RunStatus.failed
307
313
  error_data = {
308
314
  "error": {
309
315
  "type": "llm_rate_limit",
@@ -311,8 +317,13 @@ class StreamingService:
311
317
  "detail": str(e),
312
318
  }
313
319
  }
320
+ stop_reason = StopReasonType.llm_api_error
321
+ logger.warning(f"Run {run_id} stopped with LLM rate limit error: {e}, error_data: {error_data}")
314
322
  yield (f"data: {json.dumps(error_data)}\n\n", 429)
323
+ # Send [DONE] marker to properly close the stream
324
+ yield "data: [DONE]\n\n"
315
325
  except LLMAuthenticationError as e:
326
+ run_status = RunStatus.failed
316
327
  error_data = {
317
328
  "error": {
318
329
  "type": "llm_authentication",
@@ -320,13 +331,41 @@ class StreamingService:
320
331
  "detail": str(e),
321
332
  }
322
333
  }
334
+ logger.warning(f"Run {run_id} stopped with LLM authentication error: {e}, error_data: {error_data}")
335
+ stop_reason = StopReasonType.llm_api_error
323
336
  yield (f"data: {json.dumps(error_data)}\n\n", 401)
337
+ # Send [DONE] marker to properly close the stream
338
+ yield "data: [DONE]\n\n"
324
339
  except LLMError as e:
340
+ run_status = RunStatus.failed
325
341
  error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
342
+ logger.error(f"Run {run_id} stopped with LLM error: {e}, error_data: {error_data}")
326
343
  yield (f"data: {json.dumps(error_data)}\n\n", 502)
344
+ # Send [DONE] marker to properly close the stream
345
+ stop_reason = StopReasonType.llm_api_error
346
+ yield "data: [DONE]\n\n"
327
347
  except Exception as e:
328
- error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
348
+ run_status = RunStatus.failed
349
+ error_data = {
350
+ "error": {
351
+ "type": "internal_error",
352
+ "message": "An unknown error occurred with the LLM streaming request.",
353
+ "detail": str(e),
354
+ }
355
+ }
356
+ logger.error(f"Run {run_id} stopped with unknown error: {e}, error_data: {error_data}")
357
+ stop_reason = StopReasonType.error
329
358
  yield (f"data: {json.dumps(error_data)}\n\n", 500)
359
+ # Re-raise to ensure proper error handling and Sentry capture
360
+ raise
361
+ finally:
362
+ # always update run status, whether success or failure
363
+ if run_id and self.runs_manager and run_status:
364
+ await self.runs_manager.update_run_by_id_async(
365
+ run_id=run_id,
366
+ update=RunUpdate(status=run_status, stop_reason=stop_reason, metadata=error_data),
367
+ actor=actor,
368
+ )
330
369
 
331
370
  return error_aware_stream()
332
371
 
@@ -4,7 +4,13 @@ import traceback
4
4
  from typing import List, Optional, Tuple, Union
5
5
 
6
6
  from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
7
- from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, MESSAGE_SUMMARY_REQUEST_ACK
7
+ from letta.constants import (
8
+ DEFAULT_MESSAGE_TOOL,
9
+ DEFAULT_MESSAGE_TOOL_KWARG,
10
+ MESSAGE_SUMMARY_REQUEST_ACK,
11
+ TOOL_RETURN_TRUNCATION_CHARS,
12
+ )
13
+ from letta.errors import ContextWindowExceededError
8
14
  from letta.helpers.message_helper import convert_message_creates_to_messages
9
15
  from letta.llm_api.llm_client import LLMClient
10
16
  from letta.log import get_logger
@@ -394,7 +400,27 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
394
400
  response_data = await llm_client.request_async(request_data, summarizer_llm_config)
395
401
  except Exception as e:
396
402
  # handle LLM error (likely a context window exceeded error)
397
- raise llm_client.handle_llm_error(e)
403
+ try:
404
+ raise llm_client.handle_llm_error(e)
405
+ except ContextWindowExceededError as context_error:
406
+ logger.warning(
407
+ f"Context window exceeded during summarization, falling back to truncated tool returns. Original error: {context_error}"
408
+ )
409
+
410
+ # Fallback: rebuild request with truncated tool returns
411
+ request_data = llm_client.build_request_data(
412
+ AgentType.letta_v1_agent,
413
+ input_messages_obj,
414
+ summarizer_llm_config,
415
+ tools=[],
416
+ tool_return_truncation_chars=TOOL_RETURN_TRUNCATION_CHARS,
417
+ )
418
+
419
+ try:
420
+ response_data = await llm_client.request_async(request_data, summarizer_llm_config)
421
+ except Exception as fallback_error:
422
+ logger.error(f"Fallback summarization also failed: {fallback_error}")
423
+ raise llm_client.handle_llm_error(fallback_error)
398
424
  response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, summarizer_llm_config)
399
425
  if response.choices[0].message.content is None:
400
426
  logger.warning("No content returned from summarizer")
@@ -189,7 +189,7 @@ class LettaBuiltinToolExecutor(ToolExecutor):
189
189
  return json.dumps(response, indent=2, ensure_ascii=False)
190
190
 
191
191
  except Exception as e:
192
- logger.error(f"Exa search failed for query '{query}': {str(e)}")
192
+ logger.info(f"Exa search failed for query '{query}': {str(e)}")
193
193
  return json.dumps({"query": query, "error": f"Search failed: {str(e)}"})
194
194
 
195
195
  async def fetch_webpage(self, agent_state: "AgentState", url: str) -> str: