letta-nightly 0.8.8.dev20250703104323__py3-none-any.whl → 0.8.8.dev20250703174903__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/agent.py +1 -0
  2. letta/agents/base_agent.py +8 -2
  3. letta/agents/ephemeral_summary_agent.py +33 -33
  4. letta/agents/letta_agent.py +104 -53
  5. letta/agents/voice_agent.py +2 -1
  6. letta/constants.py +8 -4
  7. letta/functions/function_sets/files.py +22 -7
  8. letta/functions/function_sets/multi_agent.py +34 -0
  9. letta/functions/types.py +1 -1
  10. letta/groups/helpers.py +8 -5
  11. letta/groups/sleeptime_multi_agent_v2.py +20 -15
  12. letta/interface.py +1 -1
  13. letta/interfaces/anthropic_streaming_interface.py +15 -8
  14. letta/interfaces/openai_chat_completions_streaming_interface.py +9 -6
  15. letta/interfaces/openai_streaming_interface.py +17 -11
  16. letta/llm_api/openai_client.py +2 -1
  17. letta/orm/agent.py +1 -0
  18. letta/orm/file.py +8 -2
  19. letta/orm/files_agents.py +36 -11
  20. letta/orm/mcp_server.py +3 -0
  21. letta/orm/source.py +2 -1
  22. letta/orm/step.py +3 -0
  23. letta/prompts/system/memgpt_v2_chat.txt +5 -8
  24. letta/schemas/agent.py +58 -23
  25. letta/schemas/embedding_config.py +3 -2
  26. letta/schemas/enums.py +4 -0
  27. letta/schemas/file.py +1 -0
  28. letta/schemas/letta_stop_reason.py +18 -0
  29. letta/schemas/mcp.py +15 -10
  30. letta/schemas/memory.py +35 -5
  31. letta/schemas/providers.py +11 -0
  32. letta/schemas/step.py +1 -0
  33. letta/schemas/tool.py +2 -1
  34. letta/server/rest_api/routers/v1/agents.py +320 -184
  35. letta/server/rest_api/routers/v1/groups.py +6 -2
  36. letta/server/rest_api/routers/v1/identities.py +6 -2
  37. letta/server/rest_api/routers/v1/jobs.py +49 -1
  38. letta/server/rest_api/routers/v1/sources.py +28 -19
  39. letta/server/rest_api/routers/v1/steps.py +7 -2
  40. letta/server/rest_api/routers/v1/tools.py +40 -9
  41. letta/server/rest_api/streaming_response.py +88 -0
  42. letta/server/server.py +61 -55
  43. letta/services/agent_manager.py +28 -16
  44. letta/services/file_manager.py +58 -9
  45. letta/services/file_processor/chunker/llama_index_chunker.py +2 -0
  46. letta/services/file_processor/embedder/openai_embedder.py +54 -10
  47. letta/services/file_processor/file_processor.py +59 -0
  48. letta/services/file_processor/parser/mistral_parser.py +2 -0
  49. letta/services/files_agents_manager.py +120 -2
  50. letta/services/helpers/agent_manager_helper.py +21 -4
  51. letta/services/job_manager.py +57 -6
  52. letta/services/mcp/base_client.py +1 -0
  53. letta/services/mcp_manager.py +13 -1
  54. letta/services/step_manager.py +14 -5
  55. letta/services/summarizer/summarizer.py +6 -22
  56. letta/services/tool_executor/builtin_tool_executor.py +0 -1
  57. letta/services/tool_executor/files_tool_executor.py +2 -2
  58. letta/services/tool_executor/multi_agent_tool_executor.py +23 -0
  59. letta/services/tool_manager.py +7 -7
  60. letta/settings.py +11 -2
  61. letta/templates/summary_request_text.j2 +19 -0
  62. letta/utils.py +95 -14
  63. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/METADATA +2 -2
  64. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/RECORD +68 -67
  65. /letta/{agents/prompts → prompts/system}/summary_system_prompt.txt +0 -0
  66. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/entry_points.txt +0 -0
@@ -73,7 +73,9 @@ def create_group(
73
73
  group: GroupCreate = Body(...),
74
74
  server: "SyncServer" = Depends(get_letta_server),
75
75
  actor_id: Optional[str] = Header(None, alias="user_id"),
76
- x_project: Optional[str] = Header(None, alias="X-Project"), # Only handled by next js middleware
76
+ x_project: Optional[str] = Header(
77
+ None, alias="X-Project", description="The project slug to associate with the group (cloud only)."
78
+ ), # Only handled by next js middleware
77
79
  ):
78
80
  """
79
81
  Create a new multi-agent group with the specified configuration.
@@ -91,7 +93,9 @@ async def modify_group(
91
93
  group: GroupUpdate = Body(...),
92
94
  server: "SyncServer" = Depends(get_letta_server),
93
95
  actor_id: Optional[str] = Header(None, alias="user_id"),
94
- x_project: Optional[str] = Header(None, alias="X-Project"), # Only handled by next js middleware
96
+ x_project: Optional[str] = Header(
97
+ None, alias="X-Project", description="The project slug to associate with the group (cloud only)."
98
+ ), # Only handled by next js middleware
95
99
  ):
96
100
  """
97
101
  Create a new multi-agent group with the specified configuration.
@@ -86,7 +86,9 @@ async def create_identity(
86
86
  identity: IdentityCreate = Body(...),
87
87
  server: "SyncServer" = Depends(get_letta_server),
88
88
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
89
- x_project: Optional[str] = Header(None, alias="X-Project"), # Only handled by next js middleware
89
+ x_project: Optional[str] = Header(
90
+ None, alias="X-Project", description="The project slug to associate with the identity (cloud only)."
91
+ ), # Only handled by next js middleware
90
92
  ):
91
93
  try:
92
94
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
@@ -110,7 +112,9 @@ async def upsert_identity(
110
112
  identity: IdentityUpsert = Body(...),
111
113
  server: "SyncServer" = Depends(get_letta_server),
112
114
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
113
- x_project: Optional[str] = Header(None, alias="X-Project"), # Only handled by next js middleware
115
+ x_project: Optional[str] = Header(
116
+ None, alias="X-Project", description="The project slug to associate with the identity (cloud only)."
117
+ ), # Only handled by next js middleware
114
118
  ):
115
119
  try:
116
120
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
@@ -15,10 +15,15 @@ router = APIRouter(prefix="/jobs", tags=["jobs"])
15
15
  async def list_jobs(
16
16
  server: "SyncServer" = Depends(get_letta_server),
17
17
  source_id: Optional[str] = Query(None, description="Only list jobs associated with the source."),
18
+ before: Optional[str] = Query(None, description="Cursor for pagination"),
19
+ after: Optional[str] = Query(None, description="Cursor for pagination"),
20
+ limit: Optional[int] = Query(50, description="Limit for pagination"),
21
+ ascending: bool = Query(True, description="Whether to sort jobs oldest to newest (True, default) or newest to oldest (False)"),
18
22
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
19
23
  ):
20
24
  """
21
25
  List all jobs.
26
+ TODO (cliandy): implementation for pagination
22
27
  """
23
28
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
24
29
 
@@ -26,6 +31,10 @@ async def list_jobs(
26
31
  return await server.job_manager.list_jobs_async(
27
32
  actor=actor,
28
33
  source_id=source_id,
34
+ before=before,
35
+ after=after,
36
+ limit=limit,
37
+ ascending=ascending,
29
38
  )
30
39
 
31
40
 
@@ -34,12 +43,24 @@ async def list_active_jobs(
34
43
  server: "SyncServer" = Depends(get_letta_server),
35
44
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
36
45
  source_id: Optional[str] = Query(None, description="Only list jobs associated with the source."),
46
+ before: Optional[str] = Query(None, description="Cursor for pagination"),
47
+ after: Optional[str] = Query(None, description="Cursor for pagination"),
48
+ limit: Optional[int] = Query(50, description="Limit for pagination"),
49
+ ascending: bool = Query(True, description="Whether to sort jobs oldest to newest (True, default) or newest to oldest (False)"),
37
50
  ):
38
51
  """
39
52
  List all active jobs.
40
53
  """
41
54
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
42
- return await server.job_manager.list_jobs_async(actor=actor, statuses=[JobStatus.created, JobStatus.running], source_id=source_id)
55
+ return await server.job_manager.list_jobs_async(
56
+ actor=actor,
57
+ statuses=[JobStatus.created, JobStatus.running],
58
+ source_id=source_id,
59
+ before=before,
60
+ after=after,
61
+ limit=limit,
62
+ ascending=ascending,
63
+ )
43
64
 
44
65
 
45
66
  @router.get("/{job_id}", response_model=Job, operation_id="retrieve_job")
@@ -59,6 +80,33 @@ async def retrieve_job(
59
80
  raise HTTPException(status_code=404, detail="Job not found")
60
81
 
61
82
 
83
+ @router.patch("/{job_id}/cancel", response_model=Job, operation_id="cancel_job")
84
+ async def cancel_job(
85
+ job_id: str,
86
+ actor_id: Optional[str] = Header(None, alias="user_id"),
87
+ server: "SyncServer" = Depends(get_letta_server),
88
+ ):
89
+ """
90
+ Cancel a job by its job_id.
91
+
92
+ This endpoint marks a job as cancelled, which will cause any associated
93
+ agent execution to terminate as soon as possible.
94
+ """
95
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
96
+
97
+ try:
98
+ # First check if the job exists and is in a cancellable state
99
+ existing_job = await server.job_manager.get_job_by_id_async(job_id=job_id, actor=actor)
100
+
101
+ if existing_job.status.is_terminal:
102
+ return False
103
+
104
+ return await server.job_manager.safe_update_job_status_async(job_id=job_id, new_status=JobStatus.cancelled, actor=actor)
105
+
106
+ except NoResultFound:
107
+ raise HTTPException(status_code=404, detail="Job not found")
108
+
109
+
62
110
  @router.delete("/{job_id}", response_model=Job, operation_id="delete_job")
63
111
  async def delete_job(
64
112
  job_id: str,
@@ -10,6 +10,7 @@ from starlette import status
10
10
 
11
11
  import letta.constants as constants
12
12
  from letta.log import get_logger
13
+ from letta.otel.tracing import trace_method
13
14
  from letta.schemas.agent import AgentState
14
15
  from letta.schemas.embedding_config import EmbeddingConfig
15
16
  from letta.schemas.enums import FileProcessingStatus
@@ -184,6 +185,20 @@ async def upload_file_to_source(
184
185
  """
185
186
  Upload a file to a data source.
186
187
  """
188
+ # NEW: Cloud based file processing
189
+ # Determine file's MIME type
190
+ file_mime_type = mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
191
+
192
+ # Check if it's a simple text file
193
+ is_simple_file = is_simple_text_mime_type(file_mime_type)
194
+
195
+ # For complex files, require Mistral API key
196
+ if not is_simple_file and not settings.mistral_api_key:
197
+ raise HTTPException(
198
+ status_code=status.HTTP_400_BAD_REQUEST,
199
+ detail=f"Mistral API key is required to process this file type {file_mime_type}. Please configure your Mistral API key to upload complex file formats.",
200
+ )
201
+
187
202
  allowed_media_types = get_allowed_media_types()
188
203
 
189
204
  # Normalize incoming Content-Type header (strip charset or any parameters).
@@ -220,15 +235,19 @@ async def upload_file_to_source(
220
235
 
221
236
  content = await file.read()
222
237
 
223
- # sanitize filename
224
- file.filename = sanitize_filename(file.filename)
238
+ # Store original filename and generate unique filename
239
+ original_filename = sanitize_filename(file.filename) # Basic sanitization only
240
+ unique_filename = await server.file_manager.generate_unique_filename(
241
+ original_filename=original_filename, source=source, organization_id=actor.organization_id
242
+ )
225
243
 
226
244
  # create file metadata
227
245
  file_metadata = FileMetadata(
228
246
  source_id=source_id,
229
- file_name=file.filename,
247
+ file_name=unique_filename,
248
+ original_file_name=original_filename,
230
249
  file_path=None,
231
- file_type=mimetypes.guess_type(file.filename)[0] or file.content_type or "unknown",
250
+ file_type=mimetypes.guess_type(original_filename)[0] or file.content_type or "unknown",
232
251
  file_size=file.size if file.size is not None else None,
233
252
  processing_status=FileProcessingStatus.PARSING,
234
253
  )
@@ -237,20 +256,6 @@ async def upload_file_to_source(
237
256
  # TODO: Do we need to pull in the full agent_states? Can probably simplify here right?
238
257
  agent_states = await server.source_manager.list_attached_agents(source_id=source_id, actor=actor)
239
258
 
240
- # NEW: Cloud based file processing
241
- # Determine file's MIME type
242
- file_mime_type = mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
243
-
244
- # Check if it's a simple text file
245
- is_simple_file = is_simple_text_mime_type(file_mime_type)
246
-
247
- # For complex files, require Mistral API key
248
- if not is_simple_file and not settings.mistral_api_key:
249
- raise HTTPException(
250
- status_code=status.HTTP_400_BAD_REQUEST,
251
- detail=f"Mistral API key is required to process this file type {file_mime_type}. Please configure your Mistral API key to upload complex file formats.",
252
- )
253
-
254
259
  # Use cloud processing for all files (simple files always, complex files with Mistral key)
255
260
  logger.info("Running experimental cloud based file processing...")
256
261
  safe_create_task(
@@ -304,6 +309,7 @@ async def list_source_files(
304
309
  after=after,
305
310
  actor=actor,
306
311
  include_content=include_content,
312
+ strip_directory_prefix=True, # TODO: Reconsider this. This is purely for aesthetics.
307
313
  )
308
314
 
309
315
 
@@ -326,7 +332,9 @@ async def get_file_metadata(
326
332
  raise HTTPException(status_code=404, detail=f"Source with id={source_id} not found.")
327
333
 
328
334
  # Get file metadata using the file manager
329
- file_metadata = await server.file_manager.get_file_by_id(file_id=file_id, actor=actor, include_content=include_content)
335
+ file_metadata = await server.file_manager.get_file_by_id(
336
+ file_id=file_id, actor=actor, include_content=include_content, strip_directory_prefix=True
337
+ )
330
338
 
331
339
  if not file_metadata:
332
340
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
@@ -382,6 +390,7 @@ async def sleeptime_document_ingest_async(server: SyncServer, source_id: str, ac
382
390
  await server.sleeptime_document_ingest_async(agent, source, actor, clear_history)
383
391
 
384
392
 
393
+ @trace_method
385
394
  async def load_file_to_source_cloud(
386
395
  server: SyncServer,
387
396
  agent_states: List[AgentState],
@@ -26,8 +26,12 @@ async def list_steps(
26
26
  feedback: Optional[Literal["positive", "negative"]] = Query(None, description="Filter by feedback"),
27
27
  has_feedback: Optional[bool] = Query(None, description="Filter by whether steps have feedback (true) or not (false)"),
28
28
  tags: Optional[list[str]] = Query(None, description="Filter by tags"),
29
+ project_id: Optional[str] = Query(None, description="Filter by the project ID that is associated with the step (cloud only)."),
29
30
  server: SyncServer = Depends(get_letta_server),
30
31
  actor_id: Optional[str] = Header(None, alias="user_id"),
32
+ x_project: Optional[str] = Header(
33
+ None, alias="X-Project", description="Filter by project slug to associate with the group (cloud only)."
34
+ ), # Only handled by next js middleware
31
35
  ):
32
36
  """
33
37
  List steps with optional pagination and date filters.
@@ -53,6 +57,7 @@ async def list_steps(
53
57
  feedback=feedback,
54
58
  has_feedback=has_feedback,
55
59
  tags=tags,
60
+ project_id=project_id,
56
61
  )
57
62
 
58
63
 
@@ -90,7 +95,7 @@ async def add_feedback(
90
95
 
91
96
 
92
97
  @router.patch("/{step_id}/transaction/{transaction_id}", response_model=Step, operation_id="update_step_transaction_id")
93
- def update_step_transaction_id(
98
+ async def update_step_transaction_id(
94
99
  step_id: str,
95
100
  transaction_id: str,
96
101
  actor_id: Optional[str] = Header(None, alias="user_id"),
@@ -102,6 +107,6 @@ def update_step_transaction_id(
102
107
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
103
108
 
104
109
  try:
105
- return server.step_manager.update_step_transaction_id(actor=actor, step_id=step_id, transaction_id=transaction_id)
110
+ return await server.step_manager.update_step_transaction_id(actor=actor, step_id=step_id, transaction_id=transaction_id)
106
111
  except NoResultFound:
107
112
  raise HTTPException(status_code=404, detail="Step not found")
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Union
1
+ from typing import Any, Dict, List, Optional, Union
2
2
 
3
3
  from composio.client import ComposioClientError, HTTPError, NoItemsFound
4
4
  from composio.client.collections import ActionModel, AppModel
@@ -10,8 +10,10 @@ from composio.exceptions import (
10
10
  EnumStringNotFound,
11
11
  )
12
12
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
13
+ from pydantic import BaseModel, Field
13
14
 
14
15
  from letta.errors import LettaToolCreateError
16
+ from letta.functions.functions import derive_openai_json_schema
15
17
  from letta.functions.mcp_client.exceptions import MCPTimeoutError
16
18
  from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig, StreamableHTTPServerConfig
17
19
  from letta.helpers.composio_helpers import get_composio_api_key
@@ -521,11 +523,19 @@ async def add_mcp_server_to_config(
521
523
  )
522
524
  elif isinstance(request, SSEServerConfig):
523
525
  mapped_request = MCPServer(
524
- server_name=request.server_name, server_type=request.type, server_url=request.server_url, token=request.resolve_token()
526
+ server_name=request.server_name,
527
+ server_type=request.type,
528
+ server_url=request.server_url,
529
+ token=request.resolve_token() if not request.custom_headers else None,
530
+ custom_headers=request.custom_headers,
525
531
  )
526
532
  elif isinstance(request, StreamableHTTPServerConfig):
527
533
  mapped_request = MCPServer(
528
- server_name=request.server_name, server_type=request.type, server_url=request.server_url, token=request.resolve_token()
534
+ server_name=request.server_name,
535
+ server_type=request.type,
536
+ server_url=request.server_url,
537
+ token=request.resolve_token() if not request.custom_headers else None,
538
+ custom_headers=request.custom_headers,
529
539
  )
530
540
 
531
541
  await server.mcp_manager.create_mcp_server(mapped_request, actor=actor)
@@ -637,7 +647,6 @@ async def test_mcp_server(
637
647
 
638
648
  await client.connect_to_server()
639
649
  tools = await client.list_tools()
640
- await client.cleanup()
641
650
  return tools
642
651
  except ConnectionError as e:
643
652
  raise HTTPException(
@@ -658,11 +667,6 @@ async def test_mcp_server(
658
667
  },
659
668
  )
660
669
  except Exception as e:
661
- if client:
662
- try:
663
- await client.cleanup()
664
- except:
665
- pass
666
670
  raise HTTPException(
667
671
  status_code=500,
668
672
  detail={
@@ -671,3 +675,30 @@ async def test_mcp_server(
671
675
  "server_name": request.server_name,
672
676
  },
673
677
  )
678
+ finally:
679
+ if client:
680
+ try:
681
+ await client.cleanup()
682
+ except Exception as cleanup_error:
683
+ logger.warning(f"Error during MCP client cleanup: {cleanup_error}")
684
+
685
+
686
+ class CodeInput(BaseModel):
687
+ code: str = Field(..., description="Python source code to parse for JSON schema")
688
+
689
+
690
+ @router.post("/generate-schema", response_model=Dict[str, Any], operation_id="generate_json_schema")
691
+ async def generate_json_schema(
692
+ request: CodeInput = Body(...),
693
+ server: SyncServer = Depends(get_letta_server),
694
+ actor_id: Optional[str] = Header(None, alias="user_id"),
695
+ ):
696
+ """
697
+ Generate a JSON schema from the given Python source code defining a function or class.
698
+ """
699
+ try:
700
+ schema = derive_openai_json_schema(source_code=request.code)
701
+ return schema
702
+
703
+ except Exception as e:
704
+ raise HTTPException(status_code=400, detail=f"Failed to generate schema: {str(e)}")
@@ -2,6 +2,7 @@
2
2
  # stremaing HTTP trailers, as we cannot set codes after the initial response.
3
3
  # Taken from: https://github.com/fastapi/fastapi/discussions/10138#discussioncomment-10377361
4
4
 
5
+ import asyncio
5
6
  import json
6
7
  from collections.abc import AsyncIterator
7
8
 
@@ -9,10 +10,73 @@ from fastapi.responses import StreamingResponse
9
10
  from starlette.types import Send
10
11
 
11
12
  from letta.log import get_logger
13
+ from letta.schemas.enums import JobStatus
14
+ from letta.schemas.user import User
15
+ from letta.services.job_manager import JobManager
12
16
 
13
17
  logger = get_logger(__name__)
14
18
 
15
19
 
20
+ # TODO (cliandy) wrap this and handle types
21
+ async def cancellation_aware_stream_wrapper(
22
+ stream_generator: AsyncIterator[str | bytes],
23
+ job_manager: JobManager,
24
+ job_id: str,
25
+ actor: User,
26
+ cancellation_check_interval: float = 0.5,
27
+ ) -> AsyncIterator[str | bytes]:
28
+ """
29
+ Wraps a stream generator to provide real-time job cancellation checking.
30
+
31
+ This wrapper periodically checks for job cancellation while streaming and
32
+ can interrupt the stream at any point, not just at step boundaries.
33
+
34
+ Args:
35
+ stream_generator: The original stream generator to wrap
36
+ job_manager: Job manager instance for checking job status
37
+ job_id: ID of the job to monitor for cancellation
38
+ actor: User/actor making the request
39
+ cancellation_check_interval: How often to check for cancellation (seconds)
40
+
41
+ Yields:
42
+ Stream chunks from the original generator until cancelled
43
+
44
+ Raises:
45
+ asyncio.CancelledError: If the job is cancelled during streaming
46
+ """
47
+ last_cancellation_check = asyncio.get_event_loop().time()
48
+
49
+ try:
50
+ async for chunk in stream_generator:
51
+ # Check for cancellation periodically (not on every chunk for performance)
52
+ current_time = asyncio.get_event_loop().time()
53
+ if current_time - last_cancellation_check >= cancellation_check_interval:
54
+ try:
55
+ job = await job_manager.get_job_by_id_async(job_id=job_id, actor=actor)
56
+ if job.status == JobStatus.cancelled:
57
+ logger.info(f"Stream cancelled for job {job_id}, interrupting stream")
58
+ # Send cancellation event to client
59
+ cancellation_event = {"message_type": "stop_reason", "stop_reason": "cancelled"}
60
+ yield f"data: {json.dumps(cancellation_event)}\n\n"
61
+ # Raise CancelledError to interrupt the stream
62
+ raise asyncio.CancelledError(f"Job {job_id} was cancelled")
63
+ except Exception as e:
64
+ # Log warning but don't fail the stream if cancellation check fails
65
+ logger.warning(f"Failed to check job cancellation for job {job_id}: {e}")
66
+
67
+ last_cancellation_check = current_time
68
+
69
+ yield chunk
70
+
71
+ except asyncio.CancelledError:
72
+ # Re-raise CancelledError to ensure proper cleanup
73
+ logger.info(f"Stream for job {job_id} was cancelled and cleaned up")
74
+ raise
75
+ except Exception as e:
76
+ logger.error(f"Error in cancellation-aware stream wrapper for job {job_id}: {e}")
77
+ raise
78
+
79
+
16
80
  class StreamingResponseWithStatusCode(StreamingResponse):
17
81
  """
18
82
  Variation of StreamingResponse that can dynamically decide the HTTP status code,
@@ -81,6 +145,30 @@ class StreamingResponseWithStatusCode(StreamingResponse):
81
145
  }
82
146
  )
83
147
 
148
+ # This should be handled properly upstream?
149
+ except asyncio.CancelledError:
150
+ logger.info("Stream was cancelled by client or job cancellation")
151
+ # Handle cancellation gracefully
152
+ more_body = False
153
+ cancellation_resp = {"error": {"message": "Stream cancelled"}}
154
+ cancellation_event = f"event: cancelled\ndata: {json.dumps(cancellation_resp)}\n\n".encode(self.charset)
155
+ if not self.response_started:
156
+ await send(
157
+ {
158
+ "type": "http.response.start",
159
+ "status": 200, # Use 200 for graceful cancellation
160
+ "headers": self.raw_headers,
161
+ }
162
+ )
163
+ await send(
164
+ {
165
+ "type": "http.response.body",
166
+ "body": cancellation_event,
167
+ "more_body": more_body,
168
+ }
169
+ )
170
+ return
171
+
84
172
  except Exception:
85
173
  logger.exception("unhandled_streaming_error")
86
174
  more_body = False
letta/server/server.py CHANGED
@@ -19,7 +19,6 @@ import letta.constants as constants
19
19
  import letta.server.utils as server_utils
20
20
  import letta.system as system
21
21
  from letta.agent import Agent, save_agent
22
- from letta.agents.letta_agent import LettaAgent
23
22
  from letta.config import LettaConfig
24
23
  from letta.constants import LETTA_TOOL_EXECUTION_DIR
25
24
  from letta.data_sources.connectors import DataConnector, load_data
@@ -101,7 +100,7 @@ from letta.services.provider_manager import ProviderManager
101
100
  from letta.services.sandbox_config_manager import SandboxConfigManager
102
101
  from letta.services.source_manager import SourceManager
103
102
  from letta.services.step_manager import StepManager
104
- from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
103
+ from letta.services.telemetry_manager import TelemetryManager
105
104
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
106
105
  from letta.services.tool_manager import ToolManager
107
106
  from letta.services.user_manager import UserManager
@@ -1360,46 +1359,28 @@ class SyncServer(Server):
1360
1359
  async def sleeptime_document_ingest_async(
1361
1360
  self, main_agent: AgentState, source: Source, actor: User, clear_history: bool = False
1362
1361
  ) -> None:
1363
- sleeptime_agent_state = await self.create_document_sleeptime_agent_async(main_agent, source, actor, clear_history)
1364
- sleeptime_agent = LettaAgent(
1365
- agent_id=sleeptime_agent_state.id,
1366
- message_manager=self.message_manager,
1367
- agent_manager=self.agent_manager,
1368
- block_manager=self.block_manager,
1369
- job_manager=self.job_manager,
1370
- passage_manager=self.passage_manager,
1371
- actor=actor,
1372
- step_manager=self.step_manager,
1373
- telemetry_manager=self.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1374
- )
1375
- passages = await self.agent_manager.list_passages_async(actor=actor, source_id=source.id)
1376
- for passage in passages:
1377
- await sleeptime_agent.step(
1378
- input_messages=[
1379
- MessageCreate(role="user", content=passage.text),
1380
- ]
1381
- )
1382
- await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
1383
-
1384
- async def _upsert_file_to_agent(self, agent_id: str, file_metadata_with_content: FileMetadata, actor: User) -> List[str]:
1385
- """
1386
- Internal method to create or update a file <-> agent association
1387
-
1388
- Returns:
1389
- List of file names that were closed due to LRU eviction
1390
- """
1391
- # TODO: Maybe have LineChunker object be on the server level?
1392
- content_lines = LineChunker().chunk_text(file_metadata=file_metadata_with_content)
1393
- visible_content = "\n".join(content_lines)
1394
-
1395
- file_agent, closed_files = await self.file_agent_manager.attach_file(
1396
- agent_id=agent_id,
1397
- file_id=file_metadata_with_content.id,
1398
- file_name=file_metadata_with_content.file_name,
1399
- actor=actor,
1400
- visible_content=visible_content,
1401
- )
1402
- return closed_files
1362
+ # TEMPORARILY DISABLE UNTIL V2
1363
+ # sleeptime_agent_state = await self.create_document_sleeptime_agent_async(main_agent, source, actor, clear_history)
1364
+ # sleeptime_agent = LettaAgent(
1365
+ # agent_id=sleeptime_agent_state.id,
1366
+ # message_manager=self.message_manager,
1367
+ # agent_manager=self.agent_manager,
1368
+ # block_manager=self.block_manager,
1369
+ # job_manager=self.job_manager,
1370
+ # passage_manager=self.passage_manager,
1371
+ # actor=actor,
1372
+ # step_manager=self.step_manager,
1373
+ # telemetry_manager=self.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1374
+ # )
1375
+ # passages = await self.agent_manager.list_passages_async(actor=actor, source_id=source.id)
1376
+ # for passage in passages:
1377
+ # await sleeptime_agent.step(
1378
+ # input_messages=[
1379
+ # MessageCreate(role="user", content=passage.text),
1380
+ # ]
1381
+ # )
1382
+ # await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
1383
+ pass
1403
1384
 
1404
1385
  async def _remove_file_from_agent(self, agent_id: str, file_id: str, actor: User) -> None:
1405
1386
  """
@@ -1430,9 +1411,23 @@ class SyncServer(Server):
1430
1411
  logger.info(f"Inserting document into context window for source: {source_id}")
1431
1412
  logger.info(f"Attached agents: {[a.id for a in agent_states]}")
1432
1413
 
1433
- # Collect any files that were closed due to LRU eviction during bulk attach
1414
+ # Generate visible content for the file
1415
+ line_chunker = LineChunker()
1416
+ content_lines = line_chunker.chunk_text(file_metadata=file_metadata_with_content)
1417
+ visible_content = "\n".join(content_lines)
1418
+ visible_content_map = {file_metadata_with_content.file_name: visible_content}
1419
+
1420
+ # Attach file to each agent using bulk method (one file per agent, but atomic per agent)
1434
1421
  all_closed_files = await asyncio.gather(
1435
- *(self._upsert_file_to_agent(agent_state.id, file_metadata_with_content, actor) for agent_state in agent_states)
1422
+ *(
1423
+ self.file_agent_manager.attach_files_bulk(
1424
+ agent_id=agent_state.id,
1425
+ files_metadata=[file_metadata_with_content],
1426
+ visible_content_map=visible_content_map,
1427
+ actor=actor,
1428
+ )
1429
+ for agent_state in agent_states
1430
+ )
1436
1431
  )
1437
1432
  # Flatten and log if any files were closed
1438
1433
  closed_files = [file for closed_list in all_closed_files for file in closed_list]
@@ -1448,14 +1443,23 @@ class SyncServer(Server):
1448
1443
  Insert the uploaded documents into the context window of an agent
1449
1444
  attached to the given source.
1450
1445
  """
1451
- logger.info(f"Inserting documents into context window for agent_state: {agent_state.id}")
1452
-
1453
- # Collect any files that were closed due to LRU eviction during bulk insert
1454
- all_closed_files = await asyncio.gather(
1455
- *(self._upsert_file_to_agent(agent_state.id, file_metadata, actor) for file_metadata in file_metadata_with_content)
1446
+ logger.info(f"Inserting {len(file_metadata_with_content)} documents into context window for agent_state: {agent_state.id}")
1447
+
1448
+ # Generate visible content for each file
1449
+ line_chunker = LineChunker()
1450
+ visible_content_map = {}
1451
+ for file_metadata in file_metadata_with_content:
1452
+ content_lines = line_chunker.chunk_text(file_metadata=file_metadata)
1453
+ visible_content_map[file_metadata.file_name] = "\n".join(content_lines)
1454
+
1455
+ # Use bulk attach to avoid race conditions and duplicate LRU eviction decisions
1456
+ closed_files = await self.file_agent_manager.attach_files_bulk(
1457
+ agent_id=agent_state.id,
1458
+ files_metadata=file_metadata_with_content,
1459
+ visible_content_map=visible_content_map,
1460
+ actor=actor,
1456
1461
  )
1457
- # Flatten and log if any files were closed
1458
- closed_files = [file for closed_list in all_closed_files for file in closed_list]
1462
+
1459
1463
  if closed_files:
1460
1464
  logger.info(f"LRU eviction closed {len(closed_files)} files during bulk insert: {closed_files}")
1461
1465
 
@@ -1634,12 +1638,14 @@ class SyncServer(Server):
1634
1638
 
1635
1639
  async def get_provider_models(provider: Provider) -> list[LLMConfig]:
1636
1640
  try:
1637
- return await provider.list_llm_models_async()
1641
+ async with asyncio.timeout(constants.GET_PROVIDERS_TIMEOUT_SECONDS):
1642
+ return await provider.list_llm_models_async()
1643
+ except asyncio.TimeoutError:
1644
+ warnings.warn(f"Timeout while listing LLM models for provider {provider}")
1645
+ return []
1638
1646
  except Exception as e:
1639
- import traceback
1640
-
1641
1647
  traceback.print_exc()
1642
- warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}")
1648
+ warnings.warn(f"Error while listing LLM models for provider {provider}: {e}")
1643
1649
  return []
1644
1650
 
1645
1651
  # Execute all provider model listing tasks concurrently