letta-nightly 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +9 -3
  3. letta/agents/base_agent.py +2 -2
  4. letta/agents/letta_agent.py +56 -45
  5. letta/agents/voice_agent.py +2 -2
  6. letta/data_sources/redis_client.py +146 -1
  7. letta/errors.py +4 -0
  8. letta/functions/function_sets/files.py +2 -2
  9. letta/functions/mcp_client/types.py +30 -6
  10. letta/functions/schema_generator.py +46 -1
  11. letta/functions/schema_validator.py +17 -2
  12. letta/functions/types.py +1 -1
  13. letta/helpers/tool_execution_helper.py +0 -2
  14. letta/llm_api/anthropic_client.py +27 -5
  15. letta/llm_api/deepseek_client.py +97 -0
  16. letta/llm_api/groq_client.py +79 -0
  17. letta/llm_api/helpers.py +0 -1
  18. letta/llm_api/llm_api_tools.py +2 -113
  19. letta/llm_api/llm_client.py +21 -0
  20. letta/llm_api/llm_client_base.py +11 -9
  21. letta/llm_api/openai_client.py +3 -0
  22. letta/llm_api/xai_client.py +85 -0
  23. letta/prompts/prompt_generator.py +190 -0
  24. letta/schemas/agent_file.py +17 -2
  25. letta/schemas/file.py +24 -1
  26. letta/schemas/job.py +2 -0
  27. letta/schemas/letta_message.py +2 -0
  28. letta/schemas/letta_request.py +22 -0
  29. letta/schemas/message.py +10 -1
  30. letta/schemas/providers/bedrock.py +1 -0
  31. letta/server/rest_api/redis_stream_manager.py +300 -0
  32. letta/server/rest_api/routers/v1/agents.py +129 -7
  33. letta/server/rest_api/routers/v1/folders.py +15 -5
  34. letta/server/rest_api/routers/v1/runs.py +101 -11
  35. letta/server/rest_api/routers/v1/sources.py +21 -53
  36. letta/server/rest_api/routers/v1/telemetry.py +14 -4
  37. letta/server/rest_api/routers/v1/tools.py +2 -2
  38. letta/server/rest_api/streaming_response.py +3 -24
  39. letta/server/server.py +0 -1
  40. letta/services/agent_manager.py +2 -2
  41. letta/services/agent_serialization_manager.py +129 -32
  42. letta/services/file_manager.py +111 -6
  43. letta/services/file_processor/file_processor.py +5 -2
  44. letta/services/files_agents_manager.py +60 -0
  45. letta/services/helpers/agent_manager_helper.py +4 -205
  46. letta/services/helpers/tool_parser_helper.py +6 -3
  47. letta/services/mcp/base_client.py +7 -1
  48. letta/services/mcp/sse_client.py +7 -2
  49. letta/services/mcp/stdio_client.py +5 -0
  50. letta/services/mcp/streamable_http_client.py +11 -2
  51. letta/services/mcp_manager.py +31 -30
  52. letta/services/source_manager.py +26 -1
  53. letta/services/summarizer/summarizer.py +21 -10
  54. letta/services/tool_executor/files_tool_executor.py +13 -9
  55. letta/services/tool_executor/mcp_tool_executor.py +3 -0
  56. letta/services/tool_executor/tool_execution_manager.py +13 -0
  57. letta/services/tool_manager.py +43 -20
  58. letta/settings.py +1 -0
  59. letta/utils.py +37 -0
  60. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/METADATA +2 -2
  61. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/RECORD +64 -63
  62. letta/functions/mcp_client/__init__.py +0 -0
  63. letta/functions/mcp_client/base_client.py +0 -156
  64. letta/functions/mcp_client/sse_client.py +0 -51
  65. letta/functions/mcp_client/stdio_client.py +0 -109
  66. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/entry_points.txt +0 -0
@@ -14,7 +14,7 @@ from starlette.responses import Response, StreamingResponse
14
14
 
15
15
  from letta.agents.letta_agent import LettaAgent
16
16
  from letta.constants import AGENT_ID_PATTERN, DEFAULT_MAX_STEPS, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REDIS_RUN_ID_PREFIX
17
- from letta.data_sources.redis_client import get_redis_client
17
+ from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
18
18
  from letta.errors import AgentExportIdMappingError, AgentExportProcessingError, AgentFileImportError, AgentNotFoundForExportError
19
19
  from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
20
20
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns
@@ -26,6 +26,7 @@ from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
26
26
  from letta.schemas.agent_file import AgentFileSchema
27
27
  from letta.schemas.block import Block, BlockUpdate
28
28
  from letta.schemas.enums import JobType
29
+ from letta.schemas.file import AgentFileAttachment, PaginatedAgentFiles
29
30
  from letta.schemas.group import Group
30
31
  from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
31
32
  from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
@@ -39,6 +40,7 @@ from letta.schemas.source import Source
39
40
  from letta.schemas.tool import Tool
40
41
  from letta.schemas.user import User
41
42
  from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
43
+ from letta.server.rest_api.redis_stream_manager import create_background_stream_processor, redis_sse_stream_generator
42
44
  from letta.server.rest_api.utils import get_letta_server
43
45
  from letta.server.server import SyncServer
44
46
  from letta.services.summarizer.enums import SummarizationMode
@@ -249,6 +251,7 @@ async def import_agent(
249
251
  override_existing_tools: bool = True,
250
252
  project_id: str | None = None,
251
253
  strip_messages: bool = False,
254
+ env_vars: Optional[dict[str, Any]] = None,
252
255
  ) -> List[str]:
253
256
  """
254
257
  Import an agent using the new AgentFileSchema format.
@@ -259,7 +262,13 @@ async def import_agent(
259
262
  raise HTTPException(status_code=422, detail=f"Invalid agent file schema: {e!s}")
260
263
 
261
264
  try:
262
- import_result = await server.agent_serialization_manager.import_file(schema=agent_schema, actor=actor)
265
+ import_result = await server.agent_serialization_manager.import_file(
266
+ schema=agent_schema,
267
+ actor=actor,
268
+ append_copy_suffix=append_copy_suffix,
269
+ override_existing_tools=override_existing_tools,
270
+ env_vars=env_vars,
271
+ )
263
272
 
264
273
  if not import_result.success:
265
274
  raise HTTPException(
@@ -297,7 +306,9 @@ async def import_agent_serialized(
297
306
  False,
298
307
  description="If set to True, strips all messages from the agent before importing.",
299
308
  ),
300
- env_vars: Optional[Dict[str, Any]] = Form(None, description="Environment variables to pass to the agent for tool execution."),
309
+ env_vars_json: Optional[str] = Form(
310
+ None, description="Environment variables as a JSON string to pass to the agent for tool execution."
311
+ ),
301
312
  ):
302
313
  """
303
314
  Import a serialized agent file and recreate the agent(s) in the system.
@@ -311,6 +322,17 @@ async def import_agent_serialized(
311
322
  except json.JSONDecodeError:
312
323
  raise HTTPException(status_code=400, detail="Corrupted agent file format.")
313
324
 
325
+ # Parse env_vars_json if provided
326
+ env_vars = None
327
+ if env_vars_json:
328
+ try:
329
+ env_vars = json.loads(env_vars_json)
330
+ except json.JSONDecodeError:
331
+ raise HTTPException(status_code=400, detail="env_vars_json must be a valid JSON string")
332
+
333
+ if not isinstance(env_vars, dict):
334
+ raise HTTPException(status_code=400, detail="env_vars_json must be a valid JSON string")
335
+
314
336
  # Check if the JSON is AgentFileSchema or AgentSchema
315
337
  # TODO: This is kind of hacky, but should work as long as dont' change the schema
316
338
  if "agents" in agent_json and isinstance(agent_json.get("agents"), list):
@@ -323,6 +345,7 @@ async def import_agent_serialized(
323
345
  override_existing_tools=override_existing_tools,
324
346
  project_id=project_id,
325
347
  strip_messages=strip_messages,
348
+ env_vars=env_vars,
326
349
  )
327
350
  else:
328
351
  # This is a legacy AgentSchema
@@ -728,6 +751,49 @@ async def list_agent_folders(
728
751
  return await server.agent_manager.list_attached_sources_async(agent_id=agent_id, actor=actor)
729
752
 
730
753
 
754
+ @router.get("/{agent_id}/files", response_model=PaginatedAgentFiles, operation_id="list_agent_files")
755
+ async def list_agent_files(
756
+ agent_id: str,
757
+ cursor: Optional[str] = Query(None, description="Pagination cursor from previous response"),
758
+ limit: int = Query(20, ge=1, le=100, description="Number of items to return (1-100)"),
759
+ is_open: Optional[bool] = Query(None, description="Filter by open status (true for open files, false for closed files)"),
760
+ server: "SyncServer" = Depends(get_letta_server),
761
+ actor_id: str | None = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
762
+ ):
763
+ """
764
+ Get the files attached to an agent with their open/closed status (paginated).
765
+ """
766
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
767
+
768
+ # get paginated file-agent relationships for this agent
769
+ file_agents, next_cursor, has_more = await server.file_agent_manager.list_files_for_agent_paginated(
770
+ agent_id=agent_id, actor=actor, cursor=cursor, limit=limit, is_open=is_open
771
+ )
772
+
773
+ # enrich with file and source metadata
774
+ enriched_files = []
775
+ for fa in file_agents:
776
+ # get source/folder metadata
777
+ source = await server.source_manager.get_source_by_id(source_id=fa.source_id, actor=actor)
778
+
779
+ # build response object
780
+ attachment = AgentFileAttachment(
781
+ id=fa.id,
782
+ file_id=fa.file_id,
783
+ file_name=fa.file_name,
784
+ folder_id=fa.source_id,
785
+ folder_name=source.name if source else "Unknown",
786
+ is_open=fa.is_open,
787
+ last_accessed_at=fa.last_accessed_at,
788
+ visible_content=fa.visible_content,
789
+ start_line=fa.start_line,
790
+ end_line=fa.end_line,
791
+ )
792
+ enriched_files.append(attachment)
793
+
794
+ return PaginatedAgentFiles(files=enriched_files, next_cursor=next_cursor, has_more=has_more)
795
+
796
+
731
797
  # TODO: remove? can also get with agent blocks
732
798
  @router.get("/{agent_id}/core-memory", response_model=Memory, operation_id="retrieve_agent_memory")
733
799
  async def retrieve_agent_memory(
@@ -999,7 +1065,8 @@ async def send_message(
999
1065
  "bedrock",
1000
1066
  "ollama",
1001
1067
  "azure",
1002
- "together",
1068
+ "xai",
1069
+ "groq",
1003
1070
  ]
1004
1071
 
1005
1072
  # Create a new run for execution tracking
@@ -1143,7 +1210,8 @@ async def send_message_streaming(
1143
1210
  "bedrock",
1144
1211
  "ollama",
1145
1212
  "azure",
1146
- "together",
1213
+ "xai",
1214
+ "groq",
1147
1215
  ]
1148
1216
  model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock"]
1149
1217
 
@@ -1157,6 +1225,7 @@ async def send_message_streaming(
1157
1225
  metadata={
1158
1226
  "job_type": "send_message_streaming",
1159
1227
  "agent_id": agent_id,
1228
+ "background": request.background or False,
1160
1229
  },
1161
1230
  request_config=LettaRequestConfig(
1162
1231
  use_assistant_message=request.use_assistant_message,
@@ -1211,8 +1280,58 @@ async def send_message_streaming(
1211
1280
  else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1212
1281
  ),
1213
1282
  )
1283
+
1214
1284
  from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
1215
1285
 
1286
+ if request.background and settings.track_agent_run:
1287
+ if isinstance(redis_client, NoopAsyncRedisClient):
1288
+ raise HTTPException(
1289
+ status_code=503,
1290
+ detail=(
1291
+ "Background streaming requires Redis to be running. "
1292
+ "Please ensure Redis is properly configured. "
1293
+ f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}"
1294
+ ),
1295
+ )
1296
+
1297
+ if request.stream_tokens and model_compatible_token_streaming:
1298
+ raw_stream = agent_loop.step_stream(
1299
+ input_messages=request.messages,
1300
+ max_steps=request.max_steps,
1301
+ use_assistant_message=request.use_assistant_message,
1302
+ request_start_timestamp_ns=request_start_timestamp_ns,
1303
+ include_return_message_types=request.include_return_message_types,
1304
+ )
1305
+ else:
1306
+ raw_stream = agent_loop.step_stream_no_tokens(
1307
+ request.messages,
1308
+ max_steps=request.max_steps,
1309
+ use_assistant_message=request.use_assistant_message,
1310
+ request_start_timestamp_ns=request_start_timestamp_ns,
1311
+ include_return_message_types=request.include_return_message_types,
1312
+ )
1313
+
1314
+ asyncio.create_task(
1315
+ create_background_stream_processor(
1316
+ stream_generator=raw_stream,
1317
+ redis_client=redis_client,
1318
+ run_id=run.id,
1319
+ )
1320
+ )
1321
+
1322
+ stream = redis_sse_stream_generator(
1323
+ redis_client=redis_client,
1324
+ run_id=run.id,
1325
+ )
1326
+
1327
+ if request.include_pings and settings.enable_keepalive:
1328
+ stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval)
1329
+
1330
+ return StreamingResponseWithStatusCode(
1331
+ stream,
1332
+ media_type="text/event-stream",
1333
+ )
1334
+
1216
1335
  if request.stream_tokens and model_compatible_token_streaming:
1217
1336
  raw_stream = agent_loop.step_stream(
1218
1337
  input_messages=request.messages,
@@ -1350,6 +1469,7 @@ async def _process_message_background(
1350
1469
  "google_vertex",
1351
1470
  "bedrock",
1352
1471
  "ollama",
1472
+ "groq",
1353
1473
  ]
1354
1474
  if agent_eligible and model_compatible:
1355
1475
  if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
@@ -1538,7 +1658,8 @@ async def preview_raw_payload(
1538
1658
  "bedrock",
1539
1659
  "ollama",
1540
1660
  "azure",
1541
- "together",
1661
+ "xai",
1662
+ "groq",
1542
1663
  ]
1543
1664
 
1544
1665
  if agent_eligible and model_compatible:
@@ -1608,7 +1729,8 @@ async def summarize_agent_conversation(
1608
1729
  "bedrock",
1609
1730
  "ollama",
1610
1731
  "azure",
1611
- "together",
1732
+ "xai",
1733
+ "groq",
1612
1734
  ]
1613
1735
 
1614
1736
  if agent_eligible and model_compatible:
@@ -7,6 +7,7 @@ from typing import List, Optional
7
7
 
8
8
  from fastapi import APIRouter, Depends, Header, HTTPException, Query, UploadFile
9
9
  from starlette import status
10
+ from starlette.responses import Response
10
11
 
11
12
  import letta.constants as constants
12
13
  from letta.helpers.pinecone_utils import (
@@ -34,7 +35,7 @@ from letta.services.file_processor.file_types import get_allowed_media_types, ge
34
35
  from letta.services.file_processor.parser.markitdown_parser import MarkitdownFileParser
35
36
  from letta.services.file_processor.parser.mistral_parser import MistralFileParser
36
37
  from letta.settings import settings
37
- from letta.utils import safe_create_task, sanitize_filename
38
+ from letta.utils import safe_create_file_processing_task, safe_create_task, sanitize_filename
38
39
 
39
40
  logger = get_logger(__name__)
40
41
 
@@ -138,8 +139,11 @@ async def create_folder(
138
139
  # TODO: need to asyncify this
139
140
  if not folder_create.embedding_config:
140
141
  if not folder_create.embedding:
141
- # TODO: modify error type
142
- raise ValueError("Must specify either embedding or embedding_config in request")
142
+ if settings.default_embedding_handle is None:
143
+ # TODO: modify error type
144
+ raise ValueError("Must specify either embedding or embedding_config in request")
145
+ else:
146
+ folder_create.embedding = settings.default_embedding_handle
143
147
  folder_create.embedding_config = await server.get_embedding_config_from_handle_async(
144
148
  handle=folder_create.embedding,
145
149
  embedding_chunk_size=folder_create.embedding_chunk_size or constants.DEFAULT_EMBEDDING_CHUNK_SIZE,
@@ -257,13 +261,16 @@ async def upload_file_to_folder(
257
261
 
258
262
  # Store original filename and handle duplicate logic
259
263
  # Use custom name if provided, otherwise use the uploaded file's name
260
- original_filename = sanitize_filename(name if name else file.filename) # Basic sanitization only
264
+ # If custom name is provided, use it directly (it's just metadata, not a filesystem path)
265
+ # Otherwise, sanitize the uploaded filename for security
266
+ original_filename = name if name else sanitize_filename(file.filename) # Basic sanitization only
261
267
 
262
268
  # Check if duplicate exists
263
269
  existing_file = await server.file_manager.get_file_by_original_name_and_source(
264
270
  original_filename=original_filename, source_id=folder_id, actor=actor
265
271
  )
266
272
 
273
+ unique_filename = None
267
274
  if existing_file:
268
275
  # Duplicate found, handle based on strategy
269
276
  if duplicate_handling == DuplicateFileHandling.ERROR:
@@ -305,8 +312,11 @@ async def upload_file_to_folder(
305
312
 
306
313
  # Use cloud processing for all files (simple files always, complex files with Mistral key)
307
314
  logger.info("Running experimental cloud based file processing...")
308
- safe_create_task(
315
+ safe_create_file_processing_task(
309
316
  load_file_to_source_cloud(server, agent_states, content, folder_id, actor, folder.embedding_config, file_metadata),
317
+ file_metadata=file_metadata,
318
+ server=server,
319
+ actor=actor,
310
320
  logger=logger,
311
321
  label="file_processor.process",
312
322
  )
@@ -1,16 +1,23 @@
1
+ from datetime import timedelta
1
2
  from typing import Annotated, List, Optional
2
3
 
3
- from fastapi import APIRouter, Depends, Header, HTTPException, Query
4
+ from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
4
5
  from pydantic import Field
5
6
 
7
+ from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
8
+ from letta.helpers.datetime_helpers import get_utc_time
6
9
  from letta.orm.errors import NoResultFound
7
10
  from letta.schemas.enums import JobStatus, JobType, MessageRole
8
11
  from letta.schemas.letta_message import LettaMessageUnion
12
+ from letta.schemas.letta_request import RetrieveStreamRequest
9
13
  from letta.schemas.openai.chat_completion_response import UsageStatistics
10
14
  from letta.schemas.run import Run
11
15
  from letta.schemas.step import Step
16
+ from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator
17
+ from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
12
18
  from letta.server.rest_api.utils import get_letta_server
13
19
  from letta.server.server import SyncServer
20
+ from letta.settings import settings
14
21
 
15
22
  router = APIRouter(prefix="/runs", tags=["runs"])
16
23
 
@@ -19,6 +26,14 @@ router = APIRouter(prefix="/runs", tags=["runs"])
19
26
  def list_runs(
20
27
  server: "SyncServer" = Depends(get_letta_server),
21
28
  agent_ids: Optional[List[str]] = Query(None, description="The unique identifier of the agent associated with the run."),
29
+ background: Optional[bool] = Query(None, description="If True, filters for runs that were created in background mode."),
30
+ after: Optional[str] = Query(None, description="Cursor for pagination"),
31
+ before: Optional[str] = Query(None, description="Cursor for pagination"),
32
+ limit: Optional[int] = Query(50, description="Maximum number of runs to return"),
33
+ ascending: bool = Query(
34
+ False,
35
+ description="Whether to sort agents oldest to newest (True) or newest to oldest (False, default)",
36
+ ),
22
37
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
23
38
  ):
24
39
  """
@@ -26,18 +41,29 @@ def list_runs(
26
41
  """
27
42
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
28
43
 
29
- runs = [Run.from_job(job) for job in server.job_manager.list_jobs(actor=actor, job_type=JobType.RUN)]
30
-
31
- if not agent_ids:
32
- return runs
33
-
34
- return [run for run in runs if "agent_id" in run.metadata and run.metadata["agent_id"] in agent_ids]
44
+ runs = [
45
+ Run.from_job(job)
46
+ for job in server.job_manager.list_jobs(
47
+ actor=actor,
48
+ job_type=JobType.RUN,
49
+ limit=limit,
50
+ before=before,
51
+ after=after,
52
+ ascending=False,
53
+ )
54
+ ]
55
+ if agent_ids:
56
+ runs = [run for run in runs if "agent_id" in run.metadata and run.metadata["agent_id"] in agent_ids]
57
+ if background is not None:
58
+ runs = [run for run in runs if "background" in run.metadata and run.metadata["background"] == background]
59
+ return runs
35
60
 
36
61
 
37
62
  @router.get("/active", response_model=List[Run], operation_id="list_active_runs")
38
63
  def list_active_runs(
39
64
  server: "SyncServer" = Depends(get_letta_server),
40
65
  agent_ids: Optional[List[str]] = Query(None, description="The unique identifier of the agent associated with the run."),
66
+ background: Optional[bool] = Query(None, description="If True, filters for runs that were created in background mode."),
41
67
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
42
68
  ):
43
69
  """
@@ -46,13 +72,15 @@ def list_active_runs(
46
72
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
47
73
 
48
74
  active_runs = server.job_manager.list_jobs(actor=actor, statuses=[JobStatus.created, JobStatus.running], job_type=JobType.RUN)
49
-
50
75
  active_runs = [Run.from_job(job) for job in active_runs]
51
76
 
52
- if not agent_ids:
53
- return active_runs
77
+ if agent_ids:
78
+ active_runs = [run for run in active_runs if "agent_id" in run.metadata and run.metadata["agent_id"] in agent_ids]
79
+
80
+ if background is not None:
81
+ active_runs = [run for run in active_runs if "background" in run.metadata and run.metadata["background"] == background]
54
82
 
55
- return [run for run in active_runs if "agent_id" in run.metadata and run.metadata["agent_id"] in agent_ids]
83
+ return active_runs
56
84
 
57
85
 
58
86
  @router.get("/{run_id}", response_model=Run, operation_id="retrieve_run")
@@ -213,3 +241,65 @@ async def delete_run(
213
241
  return Run.from_job(job)
214
242
  except NoResultFound:
215
243
  raise HTTPException(status_code=404, detail="Run not found")
244
+
245
+
246
+ @router.post(
247
+ "/{run_id}/stream",
248
+ response_model=None,
249
+ operation_id="retrieve_stream",
250
+ responses={
251
+ 200: {
252
+ "description": "Successful response",
253
+ "content": {
254
+ "text/event-stream": {"description": "Server-Sent Events stream"},
255
+ },
256
+ }
257
+ },
258
+ )
259
+ async def retrieve_stream(
260
+ run_id: str,
261
+ request: RetrieveStreamRequest = Body(None),
262
+ actor_id: Optional[str] = Header(None, alias="user_id"),
263
+ server: "SyncServer" = Depends(get_letta_server),
264
+ ):
265
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
266
+ try:
267
+ job = server.job_manager.get_job_by_id(job_id=run_id, actor=actor)
268
+ except NoResultFound:
269
+ raise HTTPException(status_code=404, detail="Run not found")
270
+
271
+ run = Run.from_job(job)
272
+
273
+ if "background" not in run.metadata or not run.metadata["background"]:
274
+ raise HTTPException(status_code=400, detail="Run was not created in background mode, so it cannot be retrieved.")
275
+
276
+ if run.created_at < get_utc_time() - timedelta(hours=3):
277
+ raise HTTPException(status_code=410, detail="Run was created more than 3 hours ago, and is now expired.")
278
+
279
+ redis_client = await get_redis_client()
280
+
281
+ if isinstance(redis_client, NoopAsyncRedisClient):
282
+ raise HTTPException(
283
+ status_code=503,
284
+ detail=(
285
+ "Background streaming requires Redis to be running. "
286
+ "Please ensure Redis is properly configured. "
287
+ f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}"
288
+ ),
289
+ )
290
+
291
+ stream = redis_sse_stream_generator(
292
+ redis_client=redis_client,
293
+ run_id=run_id,
294
+ starting_after=request.starting_after,
295
+ poll_interval=request.poll_interval,
296
+ batch_size=request.batch_size,
297
+ )
298
+
299
+ if request.include_pings and settings.enable_keepalive:
300
+ stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval)
301
+
302
+ return StreamingResponseWithStatusCode(
303
+ stream,
304
+ media_type="text/event-stream",
305
+ )
@@ -2,18 +2,17 @@ import asyncio
2
2
  import mimetypes
3
3
  import os
4
4
  import tempfile
5
- from datetime import datetime, timedelta, timezone
6
5
  from pathlib import Path
7
6
  from typing import List, Optional
8
7
 
9
8
  from fastapi import APIRouter, Depends, Header, HTTPException, Query, UploadFile
10
9
  from starlette import status
10
+ from starlette.responses import Response
11
11
 
12
12
  import letta.constants as constants
13
13
  from letta.helpers.pinecone_utils import (
14
14
  delete_file_records_from_pinecone_index,
15
15
  delete_source_records_from_pinecone_index,
16
- list_pinecone_index_for_files,
17
16
  should_use_pinecone,
18
17
  )
19
18
  from letta.log import get_logger
@@ -35,14 +34,13 @@ from letta.services.file_processor.file_types import get_allowed_media_types, ge
35
34
  from letta.services.file_processor.parser.markitdown_parser import MarkitdownFileParser
36
35
  from letta.services.file_processor.parser.mistral_parser import MistralFileParser
37
36
  from letta.settings import settings
38
- from letta.utils import safe_create_task, sanitize_filename
37
+ from letta.utils import safe_create_file_processing_task, safe_create_task, sanitize_filename
39
38
 
40
39
  logger = get_logger(__name__)
41
40
 
42
41
  # Register all supported file types with Python's mimetypes module
43
42
  register_mime_types()
44
43
 
45
-
46
44
  router = APIRouter(prefix="/sources", tags=["sources"])
47
45
 
48
46
 
@@ -139,8 +137,11 @@ async def create_source(
139
137
  # TODO: need to asyncify this
140
138
  if not source_create.embedding_config:
141
139
  if not source_create.embedding:
142
- # TODO: modify error type
143
- raise ValueError("Must specify either embedding or embedding_config in request")
140
+ if settings.default_embedding_handle is None:
141
+ # TODO: modify error type
142
+ raise ValueError("Must specify either embedding or embedding_config in request")
143
+ else:
144
+ source_create.embedding = settings.default_embedding_handle
144
145
  source_create.embedding_config = await server.get_embedding_config_from_handle_async(
145
146
  handle=source_create.embedding,
146
147
  embedding_chunk_size=source_create.embedding_chunk_size or constants.DEFAULT_EMBEDDING_CHUNK_SIZE,
@@ -258,7 +259,9 @@ async def upload_file_to_source(
258
259
 
259
260
  # Store original filename and handle duplicate logic
260
261
  # Use custom name if provided, otherwise use the uploaded file's name
261
- original_filename = sanitize_filename(name if name else file.filename) # Basic sanitization only
262
+ # If custom name is provided, use it directly (it's just metadata, not a filesystem path)
263
+ # Otherwise, sanitize the uploaded filename for security
264
+ original_filename = name if name else sanitize_filename(file.filename) # Basic sanitization only
262
265
 
263
266
  # Check if duplicate exists
264
267
  existing_file = await server.file_manager.get_file_by_original_name_and_source(
@@ -307,8 +310,11 @@ async def upload_file_to_source(
307
310
 
308
311
  # Use cloud processing for all files (simple files always, complex files with Mistral key)
309
312
  logger.info("Running experimental cloud based file processing...")
310
- safe_create_task(
313
+ safe_create_file_processing_task(
311
314
  load_file_to_source_cloud(server, agent_states, content, source_id, actor, source.embedding_config, file_metadata),
315
+ file_metadata=file_metadata,
316
+ server=server,
317
+ actor=actor,
312
318
  logger=logger,
313
319
  label="file_processor.process",
314
320
  )
@@ -358,6 +364,10 @@ async def list_source_files(
358
364
  limit: int = Query(1000, description="Number of files to return"),
359
365
  after: Optional[str] = Query(None, description="Pagination cursor to fetch the next set of results"),
360
366
  include_content: bool = Query(False, description="Whether to include full file content"),
367
+ check_status_updates: bool = Query(
368
+ True,
369
+ description="Whether to check and update file processing status (from the vector db service). If False, will not fetch and update the status, which may lead to performance gains.",
370
+ ),
361
371
  server: "SyncServer" = Depends(get_letta_server),
362
372
  actor_id: Optional[str] = Header(None, alias="user_id"),
363
373
  ):
@@ -372,6 +382,7 @@ async def list_source_files(
372
382
  actor=actor,
373
383
  include_content=include_content,
374
384
  strip_directory_prefix=True, # TODO: Reconsider this. This is purely for aesthetics.
385
+ check_status_updates=check_status_updates,
375
386
  )
376
387
 
377
388
 
@@ -400,51 +411,8 @@ async def get_file_metadata(
400
411
  if file_metadata.source_id != source_id:
401
412
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found in source {source_id}.")
402
413
 
403
- # Check for timeout if status is not terminal
404
- if not file_metadata.processing_status.is_terminal_state():
405
- if file_metadata.created_at:
406
- # Handle timezone differences between PostgreSQL (timezone-aware) and SQLite (timezone-naive)
407
- if settings.letta_pg_uri_no_default:
408
- # PostgreSQL: both datetimes are timezone-aware
409
- timeout_threshold = datetime.now(timezone.utc) - timedelta(minutes=settings.file_processing_timeout_minutes)
410
- file_created_at = file_metadata.created_at
411
- else:
412
- # SQLite: both datetimes should be timezone-naive
413
- timeout_threshold = datetime.utcnow() - timedelta(minutes=settings.file_processing_timeout_minutes)
414
- file_created_at = file_metadata.created_at
415
-
416
- if file_created_at < timeout_threshold:
417
- # Move file to error status with timeout message
418
- timeout_message = settings.file_processing_timeout_error_message.format(settings.file_processing_timeout_minutes)
419
- try:
420
- file_metadata = await server.file_manager.update_file_status(
421
- file_id=file_metadata.id, actor=actor, processing_status=FileProcessingStatus.ERROR, error_message=timeout_message
422
- )
423
- except ValueError as e:
424
- # state transition was blocked - log it but don't fail the request
425
- logger.warning(f"Could not update file to timeout error state: {str(e)}")
426
- # continue with existing file_metadata
427
-
428
- if should_use_pinecone() and file_metadata.processing_status == FileProcessingStatus.EMBEDDING:
429
- ids = await list_pinecone_index_for_files(file_id=file_id, actor=actor)
430
- logger.info(
431
- f"Embedded chunks {len(ids)}/{file_metadata.total_chunks} for {file_id} ({file_metadata.file_name}) in organization {actor.organization_id}"
432
- )
433
-
434
- if len(ids) != file_metadata.chunks_embedded or len(ids) == file_metadata.total_chunks:
435
- if len(ids) != file_metadata.total_chunks:
436
- file_status = file_metadata.processing_status
437
- else:
438
- file_status = FileProcessingStatus.COMPLETED
439
- try:
440
- file_metadata = await server.file_manager.update_file_status(
441
- file_id=file_metadata.id, actor=actor, chunks_embedded=len(ids), processing_status=file_status
442
- )
443
- except ValueError as e:
444
- # state transition was blocked - this is a race condition
445
- # log it but don't fail the request since we're just reading metadata
446
- logger.warning(f"Race condition detected in get_file_metadata: {str(e)}")
447
- # return the current file state without updating
414
+ # Check and update file status (timeout check and pinecone embedding sync)
415
+ file_metadata = await server.file_manager.check_and_update_file_status(file_metadata, actor)
448
416
 
449
417
  return file_metadata
450
418
 
@@ -1,18 +1,28 @@
1
+ from typing import Optional
2
+
1
3
  from fastapi import APIRouter, Depends, Header
2
4
 
3
5
  from letta.schemas.provider_trace import ProviderTrace
4
6
  from letta.server.rest_api.utils import get_letta_server
5
7
  from letta.server.server import SyncServer
8
+ from letta.settings import settings
6
9
 
7
10
  router = APIRouter(prefix="/telemetry", tags=["telemetry"])
8
11
 
9
12
 
10
- @router.get("/{step_id}", response_model=ProviderTrace, operation_id="retrieve_provider_trace")
13
+ @router.get("/{step_id}", response_model=Optional[ProviderTrace], operation_id="retrieve_provider_trace")
11
14
  async def retrieve_provider_trace_by_step_id(
12
15
  step_id: str,
13
16
  server: SyncServer = Depends(get_letta_server),
14
17
  actor_id: str | None = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
15
18
  ):
16
- return await server.telemetry_manager.get_provider_trace_by_step_id_async(
17
- step_id=step_id, actor=await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
18
- )
19
+ provider_trace = None
20
+ if settings.track_provider_trace:
21
+ try:
22
+ provider_trace = await server.telemetry_manager.get_provider_trace_by_step_id_async(
23
+ step_id=step_id, actor=await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
24
+ )
25
+ except:
26
+ pass
27
+
28
+ return provider_trace
@@ -547,7 +547,7 @@ async def add_mcp_server_to_config(
547
547
  server_name=request.server_name,
548
548
  server_type=request.type,
549
549
  server_url=request.server_url,
550
- token=request.resolve_token() if not request.custom_headers else None,
550
+ token=request.resolve_token(),
551
551
  custom_headers=request.custom_headers,
552
552
  )
553
553
  elif isinstance(request, StreamableHTTPServerConfig):
@@ -555,7 +555,7 @@ async def add_mcp_server_to_config(
555
555
  server_name=request.server_name,
556
556
  server_type=request.type,
557
557
  server_url=request.server_url,
558
- token=request.resolve_token() if not request.custom_headers else None,
558
+ token=request.resolve_token(),
559
559
  custom_headers=request.custom_headers,
560
560
  )
561
561