letta-nightly 0.8.8.dev20250703104323__py3-none-any.whl → 0.8.8.dev20250703174903__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/agent.py +1 -0
- letta/agents/base_agent.py +8 -2
- letta/agents/ephemeral_summary_agent.py +33 -33
- letta/agents/letta_agent.py +104 -53
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +8 -4
- letta/functions/function_sets/files.py +22 -7
- letta/functions/function_sets/multi_agent.py +34 -0
- letta/functions/types.py +1 -1
- letta/groups/helpers.py +8 -5
- letta/groups/sleeptime_multi_agent_v2.py +20 -15
- letta/interface.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +15 -8
- letta/interfaces/openai_chat_completions_streaming_interface.py +9 -6
- letta/interfaces/openai_streaming_interface.py +17 -11
- letta/llm_api/openai_client.py +2 -1
- letta/orm/agent.py +1 -0
- letta/orm/file.py +8 -2
- letta/orm/files_agents.py +36 -11
- letta/orm/mcp_server.py +3 -0
- letta/orm/source.py +2 -1
- letta/orm/step.py +3 -0
- letta/prompts/system/memgpt_v2_chat.txt +5 -8
- letta/schemas/agent.py +58 -23
- letta/schemas/embedding_config.py +3 -2
- letta/schemas/enums.py +4 -0
- letta/schemas/file.py +1 -0
- letta/schemas/letta_stop_reason.py +18 -0
- letta/schemas/mcp.py +15 -10
- letta/schemas/memory.py +35 -5
- letta/schemas/providers.py +11 -0
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +2 -1
- letta/server/rest_api/routers/v1/agents.py +320 -184
- letta/server/rest_api/routers/v1/groups.py +6 -2
- letta/server/rest_api/routers/v1/identities.py +6 -2
- letta/server/rest_api/routers/v1/jobs.py +49 -1
- letta/server/rest_api/routers/v1/sources.py +28 -19
- letta/server/rest_api/routers/v1/steps.py +7 -2
- letta/server/rest_api/routers/v1/tools.py +40 -9
- letta/server/rest_api/streaming_response.py +88 -0
- letta/server/server.py +61 -55
- letta/services/agent_manager.py +28 -16
- letta/services/file_manager.py +58 -9
- letta/services/file_processor/chunker/llama_index_chunker.py +2 -0
- letta/services/file_processor/embedder/openai_embedder.py +54 -10
- letta/services/file_processor/file_processor.py +59 -0
- letta/services/file_processor/parser/mistral_parser.py +2 -0
- letta/services/files_agents_manager.py +120 -2
- letta/services/helpers/agent_manager_helper.py +21 -4
- letta/services/job_manager.py +57 -6
- letta/services/mcp/base_client.py +1 -0
- letta/services/mcp_manager.py +13 -1
- letta/services/step_manager.py +14 -5
- letta/services/summarizer/summarizer.py +6 -22
- letta/services/tool_executor/builtin_tool_executor.py +0 -1
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/multi_agent_tool_executor.py +23 -0
- letta/services/tool_manager.py +7 -7
- letta/settings.py +11 -2
- letta/templates/summary_request_text.j2 +19 -0
- letta/utils.py +95 -14
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/METADATA +2 -2
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/RECORD +68 -67
- /letta/{agents/prompts → prompts/system}/summary_system_prompt.txt +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/entry_points.txt +0 -0
@@ -73,7 +73,9 @@ def create_group(
|
|
73
73
|
group: GroupCreate = Body(...),
|
74
74
|
server: "SyncServer" = Depends(get_letta_server),
|
75
75
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
76
|
-
x_project: Optional[str] = Header(
|
76
|
+
x_project: Optional[str] = Header(
|
77
|
+
None, alias="X-Project", description="The project slug to associate with the group (cloud only)."
|
78
|
+
), # Only handled by next js middleware
|
77
79
|
):
|
78
80
|
"""
|
79
81
|
Create a new multi-agent group with the specified configuration.
|
@@ -91,7 +93,9 @@ async def modify_group(
|
|
91
93
|
group: GroupUpdate = Body(...),
|
92
94
|
server: "SyncServer" = Depends(get_letta_server),
|
93
95
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
94
|
-
x_project: Optional[str] = Header(
|
96
|
+
x_project: Optional[str] = Header(
|
97
|
+
None, alias="X-Project", description="The project slug to associate with the group (cloud only)."
|
98
|
+
), # Only handled by next js middleware
|
95
99
|
):
|
96
100
|
"""
|
97
101
|
Create a new multi-agent group with the specified configuration.
|
@@ -86,7 +86,9 @@ async def create_identity(
|
|
86
86
|
identity: IdentityCreate = Body(...),
|
87
87
|
server: "SyncServer" = Depends(get_letta_server),
|
88
88
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
89
|
-
x_project: Optional[str] = Header(
|
89
|
+
x_project: Optional[str] = Header(
|
90
|
+
None, alias="X-Project", description="The project slug to associate with the identity (cloud only)."
|
91
|
+
), # Only handled by next js middleware
|
90
92
|
):
|
91
93
|
try:
|
92
94
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
@@ -110,7 +112,9 @@ async def upsert_identity(
|
|
110
112
|
identity: IdentityUpsert = Body(...),
|
111
113
|
server: "SyncServer" = Depends(get_letta_server),
|
112
114
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
113
|
-
x_project: Optional[str] = Header(
|
115
|
+
x_project: Optional[str] = Header(
|
116
|
+
None, alias="X-Project", description="The project slug to associate with the identity (cloud only)."
|
117
|
+
), # Only handled by next js middleware
|
114
118
|
):
|
115
119
|
try:
|
116
120
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
@@ -15,10 +15,15 @@ router = APIRouter(prefix="/jobs", tags=["jobs"])
|
|
15
15
|
async def list_jobs(
|
16
16
|
server: "SyncServer" = Depends(get_letta_server),
|
17
17
|
source_id: Optional[str] = Query(None, description="Only list jobs associated with the source."),
|
18
|
+
before: Optional[str] = Query(None, description="Cursor for pagination"),
|
19
|
+
after: Optional[str] = Query(None, description="Cursor for pagination"),
|
20
|
+
limit: Optional[int] = Query(50, description="Limit for pagination"),
|
21
|
+
ascending: bool = Query(True, description="Whether to sort jobs oldest to newest (True, default) or newest to oldest (False)"),
|
18
22
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
19
23
|
):
|
20
24
|
"""
|
21
25
|
List all jobs.
|
26
|
+
TODO (cliandy): implementation for pagination
|
22
27
|
"""
|
23
28
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
24
29
|
|
@@ -26,6 +31,10 @@ async def list_jobs(
|
|
26
31
|
return await server.job_manager.list_jobs_async(
|
27
32
|
actor=actor,
|
28
33
|
source_id=source_id,
|
34
|
+
before=before,
|
35
|
+
after=after,
|
36
|
+
limit=limit,
|
37
|
+
ascending=ascending,
|
29
38
|
)
|
30
39
|
|
31
40
|
|
@@ -34,12 +43,24 @@ async def list_active_jobs(
|
|
34
43
|
server: "SyncServer" = Depends(get_letta_server),
|
35
44
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
36
45
|
source_id: Optional[str] = Query(None, description="Only list jobs associated with the source."),
|
46
|
+
before: Optional[str] = Query(None, description="Cursor for pagination"),
|
47
|
+
after: Optional[str] = Query(None, description="Cursor for pagination"),
|
48
|
+
limit: Optional[int] = Query(50, description="Limit for pagination"),
|
49
|
+
ascending: bool = Query(True, description="Whether to sort jobs oldest to newest (True, default) or newest to oldest (False)"),
|
37
50
|
):
|
38
51
|
"""
|
39
52
|
List all active jobs.
|
40
53
|
"""
|
41
54
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
42
|
-
return await server.job_manager.list_jobs_async(
|
55
|
+
return await server.job_manager.list_jobs_async(
|
56
|
+
actor=actor,
|
57
|
+
statuses=[JobStatus.created, JobStatus.running],
|
58
|
+
source_id=source_id,
|
59
|
+
before=before,
|
60
|
+
after=after,
|
61
|
+
limit=limit,
|
62
|
+
ascending=ascending,
|
63
|
+
)
|
43
64
|
|
44
65
|
|
45
66
|
@router.get("/{job_id}", response_model=Job, operation_id="retrieve_job")
|
@@ -59,6 +80,33 @@ async def retrieve_job(
|
|
59
80
|
raise HTTPException(status_code=404, detail="Job not found")
|
60
81
|
|
61
82
|
|
83
|
+
@router.patch("/{job_id}/cancel", response_model=Job, operation_id="cancel_job")
|
84
|
+
async def cancel_job(
|
85
|
+
job_id: str,
|
86
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
87
|
+
server: "SyncServer" = Depends(get_letta_server),
|
88
|
+
):
|
89
|
+
"""
|
90
|
+
Cancel a job by its job_id.
|
91
|
+
|
92
|
+
This endpoint marks a job as cancelled, which will cause any associated
|
93
|
+
agent execution to terminate as soon as possible.
|
94
|
+
"""
|
95
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
96
|
+
|
97
|
+
try:
|
98
|
+
# First check if the job exists and is in a cancellable state
|
99
|
+
existing_job = await server.job_manager.get_job_by_id_async(job_id=job_id, actor=actor)
|
100
|
+
|
101
|
+
if existing_job.status.is_terminal:
|
102
|
+
return False
|
103
|
+
|
104
|
+
return await server.job_manager.safe_update_job_status_async(job_id=job_id, new_status=JobStatus.cancelled, actor=actor)
|
105
|
+
|
106
|
+
except NoResultFound:
|
107
|
+
raise HTTPException(status_code=404, detail="Job not found")
|
108
|
+
|
109
|
+
|
62
110
|
@router.delete("/{job_id}", response_model=Job, operation_id="delete_job")
|
63
111
|
async def delete_job(
|
64
112
|
job_id: str,
|
@@ -10,6 +10,7 @@ from starlette import status
|
|
10
10
|
|
11
11
|
import letta.constants as constants
|
12
12
|
from letta.log import get_logger
|
13
|
+
from letta.otel.tracing import trace_method
|
13
14
|
from letta.schemas.agent import AgentState
|
14
15
|
from letta.schemas.embedding_config import EmbeddingConfig
|
15
16
|
from letta.schemas.enums import FileProcessingStatus
|
@@ -184,6 +185,20 @@ async def upload_file_to_source(
|
|
184
185
|
"""
|
185
186
|
Upload a file to a data source.
|
186
187
|
"""
|
188
|
+
# NEW: Cloud based file processing
|
189
|
+
# Determine file's MIME type
|
190
|
+
file_mime_type = mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
|
191
|
+
|
192
|
+
# Check if it's a simple text file
|
193
|
+
is_simple_file = is_simple_text_mime_type(file_mime_type)
|
194
|
+
|
195
|
+
# For complex files, require Mistral API key
|
196
|
+
if not is_simple_file and not settings.mistral_api_key:
|
197
|
+
raise HTTPException(
|
198
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
199
|
+
detail=f"Mistral API key is required to process this file type {file_mime_type}. Please configure your Mistral API key to upload complex file formats.",
|
200
|
+
)
|
201
|
+
|
187
202
|
allowed_media_types = get_allowed_media_types()
|
188
203
|
|
189
204
|
# Normalize incoming Content-Type header (strip charset or any parameters).
|
@@ -220,15 +235,19 @@ async def upload_file_to_source(
|
|
220
235
|
|
221
236
|
content = await file.read()
|
222
237
|
|
223
|
-
#
|
224
|
-
|
238
|
+
# Store original filename and generate unique filename
|
239
|
+
original_filename = sanitize_filename(file.filename) # Basic sanitization only
|
240
|
+
unique_filename = await server.file_manager.generate_unique_filename(
|
241
|
+
original_filename=original_filename, source=source, organization_id=actor.organization_id
|
242
|
+
)
|
225
243
|
|
226
244
|
# create file metadata
|
227
245
|
file_metadata = FileMetadata(
|
228
246
|
source_id=source_id,
|
229
|
-
file_name=
|
247
|
+
file_name=unique_filename,
|
248
|
+
original_file_name=original_filename,
|
230
249
|
file_path=None,
|
231
|
-
file_type=mimetypes.guess_type(
|
250
|
+
file_type=mimetypes.guess_type(original_filename)[0] or file.content_type or "unknown",
|
232
251
|
file_size=file.size if file.size is not None else None,
|
233
252
|
processing_status=FileProcessingStatus.PARSING,
|
234
253
|
)
|
@@ -237,20 +256,6 @@ async def upload_file_to_source(
|
|
237
256
|
# TODO: Do we need to pull in the full agent_states? Can probably simplify here right?
|
238
257
|
agent_states = await server.source_manager.list_attached_agents(source_id=source_id, actor=actor)
|
239
258
|
|
240
|
-
# NEW: Cloud based file processing
|
241
|
-
# Determine file's MIME type
|
242
|
-
file_mime_type = mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
|
243
|
-
|
244
|
-
# Check if it's a simple text file
|
245
|
-
is_simple_file = is_simple_text_mime_type(file_mime_type)
|
246
|
-
|
247
|
-
# For complex files, require Mistral API key
|
248
|
-
if not is_simple_file and not settings.mistral_api_key:
|
249
|
-
raise HTTPException(
|
250
|
-
status_code=status.HTTP_400_BAD_REQUEST,
|
251
|
-
detail=f"Mistral API key is required to process this file type {file_mime_type}. Please configure your Mistral API key to upload complex file formats.",
|
252
|
-
)
|
253
|
-
|
254
259
|
# Use cloud processing for all files (simple files always, complex files with Mistral key)
|
255
260
|
logger.info("Running experimental cloud based file processing...")
|
256
261
|
safe_create_task(
|
@@ -304,6 +309,7 @@ async def list_source_files(
|
|
304
309
|
after=after,
|
305
310
|
actor=actor,
|
306
311
|
include_content=include_content,
|
312
|
+
strip_directory_prefix=True, # TODO: Reconsider this. This is purely for aesthetics.
|
307
313
|
)
|
308
314
|
|
309
315
|
|
@@ -326,7 +332,9 @@ async def get_file_metadata(
|
|
326
332
|
raise HTTPException(status_code=404, detail=f"Source with id={source_id} not found.")
|
327
333
|
|
328
334
|
# Get file metadata using the file manager
|
329
|
-
file_metadata = await server.file_manager.get_file_by_id(
|
335
|
+
file_metadata = await server.file_manager.get_file_by_id(
|
336
|
+
file_id=file_id, actor=actor, include_content=include_content, strip_directory_prefix=True
|
337
|
+
)
|
330
338
|
|
331
339
|
if not file_metadata:
|
332
340
|
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
|
@@ -382,6 +390,7 @@ async def sleeptime_document_ingest_async(server: SyncServer, source_id: str, ac
|
|
382
390
|
await server.sleeptime_document_ingest_async(agent, source, actor, clear_history)
|
383
391
|
|
384
392
|
|
393
|
+
@trace_method
|
385
394
|
async def load_file_to_source_cloud(
|
386
395
|
server: SyncServer,
|
387
396
|
agent_states: List[AgentState],
|
@@ -26,8 +26,12 @@ async def list_steps(
|
|
26
26
|
feedback: Optional[Literal["positive", "negative"]] = Query(None, description="Filter by feedback"),
|
27
27
|
has_feedback: Optional[bool] = Query(None, description="Filter by whether steps have feedback (true) or not (false)"),
|
28
28
|
tags: Optional[list[str]] = Query(None, description="Filter by tags"),
|
29
|
+
project_id: Optional[str] = Query(None, description="Filter by the project ID that is associated with the step (cloud only)."),
|
29
30
|
server: SyncServer = Depends(get_letta_server),
|
30
31
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
32
|
+
x_project: Optional[str] = Header(
|
33
|
+
None, alias="X-Project", description="Filter by project slug to associate with the group (cloud only)."
|
34
|
+
), # Only handled by next js middleware
|
31
35
|
):
|
32
36
|
"""
|
33
37
|
List steps with optional pagination and date filters.
|
@@ -53,6 +57,7 @@ async def list_steps(
|
|
53
57
|
feedback=feedback,
|
54
58
|
has_feedback=has_feedback,
|
55
59
|
tags=tags,
|
60
|
+
project_id=project_id,
|
56
61
|
)
|
57
62
|
|
58
63
|
|
@@ -90,7 +95,7 @@ async def add_feedback(
|
|
90
95
|
|
91
96
|
|
92
97
|
@router.patch("/{step_id}/transaction/{transaction_id}", response_model=Step, operation_id="update_step_transaction_id")
|
93
|
-
def update_step_transaction_id(
|
98
|
+
async def update_step_transaction_id(
|
94
99
|
step_id: str,
|
95
100
|
transaction_id: str,
|
96
101
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
@@ -102,6 +107,6 @@ def update_step_transaction_id(
|
|
102
107
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
103
108
|
|
104
109
|
try:
|
105
|
-
return server.step_manager.update_step_transaction_id(actor=actor, step_id=step_id, transaction_id=transaction_id)
|
110
|
+
return await server.step_manager.update_step_transaction_id(actor=actor, step_id=step_id, transaction_id=transaction_id)
|
106
111
|
except NoResultFound:
|
107
112
|
raise HTTPException(status_code=404, detail="Step not found")
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List, Optional, Union
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
2
2
|
|
3
3
|
from composio.client import ComposioClientError, HTTPError, NoItemsFound
|
4
4
|
from composio.client.collections import ActionModel, AppModel
|
@@ -10,8 +10,10 @@ from composio.exceptions import (
|
|
10
10
|
EnumStringNotFound,
|
11
11
|
)
|
12
12
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
|
13
|
+
from pydantic import BaseModel, Field
|
13
14
|
|
14
15
|
from letta.errors import LettaToolCreateError
|
16
|
+
from letta.functions.functions import derive_openai_json_schema
|
15
17
|
from letta.functions.mcp_client.exceptions import MCPTimeoutError
|
16
18
|
from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig, StreamableHTTPServerConfig
|
17
19
|
from letta.helpers.composio_helpers import get_composio_api_key
|
@@ -521,11 +523,19 @@ async def add_mcp_server_to_config(
|
|
521
523
|
)
|
522
524
|
elif isinstance(request, SSEServerConfig):
|
523
525
|
mapped_request = MCPServer(
|
524
|
-
server_name=request.server_name,
|
526
|
+
server_name=request.server_name,
|
527
|
+
server_type=request.type,
|
528
|
+
server_url=request.server_url,
|
529
|
+
token=request.resolve_token() if not request.custom_headers else None,
|
530
|
+
custom_headers=request.custom_headers,
|
525
531
|
)
|
526
532
|
elif isinstance(request, StreamableHTTPServerConfig):
|
527
533
|
mapped_request = MCPServer(
|
528
|
-
server_name=request.server_name,
|
534
|
+
server_name=request.server_name,
|
535
|
+
server_type=request.type,
|
536
|
+
server_url=request.server_url,
|
537
|
+
token=request.resolve_token() if not request.custom_headers else None,
|
538
|
+
custom_headers=request.custom_headers,
|
529
539
|
)
|
530
540
|
|
531
541
|
await server.mcp_manager.create_mcp_server(mapped_request, actor=actor)
|
@@ -637,7 +647,6 @@ async def test_mcp_server(
|
|
637
647
|
|
638
648
|
await client.connect_to_server()
|
639
649
|
tools = await client.list_tools()
|
640
|
-
await client.cleanup()
|
641
650
|
return tools
|
642
651
|
except ConnectionError as e:
|
643
652
|
raise HTTPException(
|
@@ -658,11 +667,6 @@ async def test_mcp_server(
|
|
658
667
|
},
|
659
668
|
)
|
660
669
|
except Exception as e:
|
661
|
-
if client:
|
662
|
-
try:
|
663
|
-
await client.cleanup()
|
664
|
-
except:
|
665
|
-
pass
|
666
670
|
raise HTTPException(
|
667
671
|
status_code=500,
|
668
672
|
detail={
|
@@ -671,3 +675,30 @@ async def test_mcp_server(
|
|
671
675
|
"server_name": request.server_name,
|
672
676
|
},
|
673
677
|
)
|
678
|
+
finally:
|
679
|
+
if client:
|
680
|
+
try:
|
681
|
+
await client.cleanup()
|
682
|
+
except Exception as cleanup_error:
|
683
|
+
logger.warning(f"Error during MCP client cleanup: {cleanup_error}")
|
684
|
+
|
685
|
+
|
686
|
+
class CodeInput(BaseModel):
|
687
|
+
code: str = Field(..., description="Python source code to parse for JSON schema")
|
688
|
+
|
689
|
+
|
690
|
+
@router.post("/generate-schema", response_model=Dict[str, Any], operation_id="generate_json_schema")
|
691
|
+
async def generate_json_schema(
|
692
|
+
request: CodeInput = Body(...),
|
693
|
+
server: SyncServer = Depends(get_letta_server),
|
694
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
695
|
+
):
|
696
|
+
"""
|
697
|
+
Generate a JSON schema from the given Python source code defining a function or class.
|
698
|
+
"""
|
699
|
+
try:
|
700
|
+
schema = derive_openai_json_schema(source_code=request.code)
|
701
|
+
return schema
|
702
|
+
|
703
|
+
except Exception as e:
|
704
|
+
raise HTTPException(status_code=400, detail=f"Failed to generate schema: {str(e)}")
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# stremaing HTTP trailers, as we cannot set codes after the initial response.
|
3
3
|
# Taken from: https://github.com/fastapi/fastapi/discussions/10138#discussioncomment-10377361
|
4
4
|
|
5
|
+
import asyncio
|
5
6
|
import json
|
6
7
|
from collections.abc import AsyncIterator
|
7
8
|
|
@@ -9,10 +10,73 @@ from fastapi.responses import StreamingResponse
|
|
9
10
|
from starlette.types import Send
|
10
11
|
|
11
12
|
from letta.log import get_logger
|
13
|
+
from letta.schemas.enums import JobStatus
|
14
|
+
from letta.schemas.user import User
|
15
|
+
from letta.services.job_manager import JobManager
|
12
16
|
|
13
17
|
logger = get_logger(__name__)
|
14
18
|
|
15
19
|
|
20
|
+
# TODO (cliandy) wrap this and handle types
|
21
|
+
async def cancellation_aware_stream_wrapper(
|
22
|
+
stream_generator: AsyncIterator[str | bytes],
|
23
|
+
job_manager: JobManager,
|
24
|
+
job_id: str,
|
25
|
+
actor: User,
|
26
|
+
cancellation_check_interval: float = 0.5,
|
27
|
+
) -> AsyncIterator[str | bytes]:
|
28
|
+
"""
|
29
|
+
Wraps a stream generator to provide real-time job cancellation checking.
|
30
|
+
|
31
|
+
This wrapper periodically checks for job cancellation while streaming and
|
32
|
+
can interrupt the stream at any point, not just at step boundaries.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
stream_generator: The original stream generator to wrap
|
36
|
+
job_manager: Job manager instance for checking job status
|
37
|
+
job_id: ID of the job to monitor for cancellation
|
38
|
+
actor: User/actor making the request
|
39
|
+
cancellation_check_interval: How often to check for cancellation (seconds)
|
40
|
+
|
41
|
+
Yields:
|
42
|
+
Stream chunks from the original generator until cancelled
|
43
|
+
|
44
|
+
Raises:
|
45
|
+
asyncio.CancelledError: If the job is cancelled during streaming
|
46
|
+
"""
|
47
|
+
last_cancellation_check = asyncio.get_event_loop().time()
|
48
|
+
|
49
|
+
try:
|
50
|
+
async for chunk in stream_generator:
|
51
|
+
# Check for cancellation periodically (not on every chunk for performance)
|
52
|
+
current_time = asyncio.get_event_loop().time()
|
53
|
+
if current_time - last_cancellation_check >= cancellation_check_interval:
|
54
|
+
try:
|
55
|
+
job = await job_manager.get_job_by_id_async(job_id=job_id, actor=actor)
|
56
|
+
if job.status == JobStatus.cancelled:
|
57
|
+
logger.info(f"Stream cancelled for job {job_id}, interrupting stream")
|
58
|
+
# Send cancellation event to client
|
59
|
+
cancellation_event = {"message_type": "stop_reason", "stop_reason": "cancelled"}
|
60
|
+
yield f"data: {json.dumps(cancellation_event)}\n\n"
|
61
|
+
# Raise CancelledError to interrupt the stream
|
62
|
+
raise asyncio.CancelledError(f"Job {job_id} was cancelled")
|
63
|
+
except Exception as e:
|
64
|
+
# Log warning but don't fail the stream if cancellation check fails
|
65
|
+
logger.warning(f"Failed to check job cancellation for job {job_id}: {e}")
|
66
|
+
|
67
|
+
last_cancellation_check = current_time
|
68
|
+
|
69
|
+
yield chunk
|
70
|
+
|
71
|
+
except asyncio.CancelledError:
|
72
|
+
# Re-raise CancelledError to ensure proper cleanup
|
73
|
+
logger.info(f"Stream for job {job_id} was cancelled and cleaned up")
|
74
|
+
raise
|
75
|
+
except Exception as e:
|
76
|
+
logger.error(f"Error in cancellation-aware stream wrapper for job {job_id}: {e}")
|
77
|
+
raise
|
78
|
+
|
79
|
+
|
16
80
|
class StreamingResponseWithStatusCode(StreamingResponse):
|
17
81
|
"""
|
18
82
|
Variation of StreamingResponse that can dynamically decide the HTTP status code,
|
@@ -81,6 +145,30 @@ class StreamingResponseWithStatusCode(StreamingResponse):
|
|
81
145
|
}
|
82
146
|
)
|
83
147
|
|
148
|
+
# This should be handled properly upstream?
|
149
|
+
except asyncio.CancelledError:
|
150
|
+
logger.info("Stream was cancelled by client or job cancellation")
|
151
|
+
# Handle cancellation gracefully
|
152
|
+
more_body = False
|
153
|
+
cancellation_resp = {"error": {"message": "Stream cancelled"}}
|
154
|
+
cancellation_event = f"event: cancelled\ndata: {json.dumps(cancellation_resp)}\n\n".encode(self.charset)
|
155
|
+
if not self.response_started:
|
156
|
+
await send(
|
157
|
+
{
|
158
|
+
"type": "http.response.start",
|
159
|
+
"status": 200, # Use 200 for graceful cancellation
|
160
|
+
"headers": self.raw_headers,
|
161
|
+
}
|
162
|
+
)
|
163
|
+
await send(
|
164
|
+
{
|
165
|
+
"type": "http.response.body",
|
166
|
+
"body": cancellation_event,
|
167
|
+
"more_body": more_body,
|
168
|
+
}
|
169
|
+
)
|
170
|
+
return
|
171
|
+
|
84
172
|
except Exception:
|
85
173
|
logger.exception("unhandled_streaming_error")
|
86
174
|
more_body = False
|
letta/server/server.py
CHANGED
@@ -19,7 +19,6 @@ import letta.constants as constants
|
|
19
19
|
import letta.server.utils as server_utils
|
20
20
|
import letta.system as system
|
21
21
|
from letta.agent import Agent, save_agent
|
22
|
-
from letta.agents.letta_agent import LettaAgent
|
23
22
|
from letta.config import LettaConfig
|
24
23
|
from letta.constants import LETTA_TOOL_EXECUTION_DIR
|
25
24
|
from letta.data_sources.connectors import DataConnector, load_data
|
@@ -101,7 +100,7 @@ from letta.services.provider_manager import ProviderManager
|
|
101
100
|
from letta.services.sandbox_config_manager import SandboxConfigManager
|
102
101
|
from letta.services.source_manager import SourceManager
|
103
102
|
from letta.services.step_manager import StepManager
|
104
|
-
from letta.services.telemetry_manager import
|
103
|
+
from letta.services.telemetry_manager import TelemetryManager
|
105
104
|
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
106
105
|
from letta.services.tool_manager import ToolManager
|
107
106
|
from letta.services.user_manager import UserManager
|
@@ -1360,46 +1359,28 @@ class SyncServer(Server):
|
|
1360
1359
|
async def sleeptime_document_ingest_async(
|
1361
1360
|
self, main_agent: AgentState, source: Source, actor: User, clear_history: bool = False
|
1362
1361
|
) -> None:
|
1363
|
-
|
1364
|
-
|
1365
|
-
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
)
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
"""
|
1386
|
-
Internal method to create or update a file <-> agent association
|
1387
|
-
|
1388
|
-
Returns:
|
1389
|
-
List of file names that were closed due to LRU eviction
|
1390
|
-
"""
|
1391
|
-
# TODO: Maybe have LineChunker object be on the server level?
|
1392
|
-
content_lines = LineChunker().chunk_text(file_metadata=file_metadata_with_content)
|
1393
|
-
visible_content = "\n".join(content_lines)
|
1394
|
-
|
1395
|
-
file_agent, closed_files = await self.file_agent_manager.attach_file(
|
1396
|
-
agent_id=agent_id,
|
1397
|
-
file_id=file_metadata_with_content.id,
|
1398
|
-
file_name=file_metadata_with_content.file_name,
|
1399
|
-
actor=actor,
|
1400
|
-
visible_content=visible_content,
|
1401
|
-
)
|
1402
|
-
return closed_files
|
1362
|
+
# TEMPORARILY DISABLE UNTIL V2
|
1363
|
+
# sleeptime_agent_state = await self.create_document_sleeptime_agent_async(main_agent, source, actor, clear_history)
|
1364
|
+
# sleeptime_agent = LettaAgent(
|
1365
|
+
# agent_id=sleeptime_agent_state.id,
|
1366
|
+
# message_manager=self.message_manager,
|
1367
|
+
# agent_manager=self.agent_manager,
|
1368
|
+
# block_manager=self.block_manager,
|
1369
|
+
# job_manager=self.job_manager,
|
1370
|
+
# passage_manager=self.passage_manager,
|
1371
|
+
# actor=actor,
|
1372
|
+
# step_manager=self.step_manager,
|
1373
|
+
# telemetry_manager=self.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1374
|
+
# )
|
1375
|
+
# passages = await self.agent_manager.list_passages_async(actor=actor, source_id=source.id)
|
1376
|
+
# for passage in passages:
|
1377
|
+
# await sleeptime_agent.step(
|
1378
|
+
# input_messages=[
|
1379
|
+
# MessageCreate(role="user", content=passage.text),
|
1380
|
+
# ]
|
1381
|
+
# )
|
1382
|
+
# await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
|
1383
|
+
pass
|
1403
1384
|
|
1404
1385
|
async def _remove_file_from_agent(self, agent_id: str, file_id: str, actor: User) -> None:
|
1405
1386
|
"""
|
@@ -1430,9 +1411,23 @@ class SyncServer(Server):
|
|
1430
1411
|
logger.info(f"Inserting document into context window for source: {source_id}")
|
1431
1412
|
logger.info(f"Attached agents: {[a.id for a in agent_states]}")
|
1432
1413
|
|
1433
|
-
#
|
1414
|
+
# Generate visible content for the file
|
1415
|
+
line_chunker = LineChunker()
|
1416
|
+
content_lines = line_chunker.chunk_text(file_metadata=file_metadata_with_content)
|
1417
|
+
visible_content = "\n".join(content_lines)
|
1418
|
+
visible_content_map = {file_metadata_with_content.file_name: visible_content}
|
1419
|
+
|
1420
|
+
# Attach file to each agent using bulk method (one file per agent, but atomic per agent)
|
1434
1421
|
all_closed_files = await asyncio.gather(
|
1435
|
-
*(
|
1422
|
+
*(
|
1423
|
+
self.file_agent_manager.attach_files_bulk(
|
1424
|
+
agent_id=agent_state.id,
|
1425
|
+
files_metadata=[file_metadata_with_content],
|
1426
|
+
visible_content_map=visible_content_map,
|
1427
|
+
actor=actor,
|
1428
|
+
)
|
1429
|
+
for agent_state in agent_states
|
1430
|
+
)
|
1436
1431
|
)
|
1437
1432
|
# Flatten and log if any files were closed
|
1438
1433
|
closed_files = [file for closed_list in all_closed_files for file in closed_list]
|
@@ -1448,14 +1443,23 @@ class SyncServer(Server):
|
|
1448
1443
|
Insert the uploaded documents into the context window of an agent
|
1449
1444
|
attached to the given source.
|
1450
1445
|
"""
|
1451
|
-
logger.info(f"Inserting documents into context window for agent_state: {agent_state.id}")
|
1452
|
-
|
1453
|
-
#
|
1454
|
-
|
1455
|
-
|
1446
|
+
logger.info(f"Inserting {len(file_metadata_with_content)} documents into context window for agent_state: {agent_state.id}")
|
1447
|
+
|
1448
|
+
# Generate visible content for each file
|
1449
|
+
line_chunker = LineChunker()
|
1450
|
+
visible_content_map = {}
|
1451
|
+
for file_metadata in file_metadata_with_content:
|
1452
|
+
content_lines = line_chunker.chunk_text(file_metadata=file_metadata)
|
1453
|
+
visible_content_map[file_metadata.file_name] = "\n".join(content_lines)
|
1454
|
+
|
1455
|
+
# Use bulk attach to avoid race conditions and duplicate LRU eviction decisions
|
1456
|
+
closed_files = await self.file_agent_manager.attach_files_bulk(
|
1457
|
+
agent_id=agent_state.id,
|
1458
|
+
files_metadata=file_metadata_with_content,
|
1459
|
+
visible_content_map=visible_content_map,
|
1460
|
+
actor=actor,
|
1456
1461
|
)
|
1457
|
-
|
1458
|
-
closed_files = [file for closed_list in all_closed_files for file in closed_list]
|
1462
|
+
|
1459
1463
|
if closed_files:
|
1460
1464
|
logger.info(f"LRU eviction closed {len(closed_files)} files during bulk insert: {closed_files}")
|
1461
1465
|
|
@@ -1634,12 +1638,14 @@ class SyncServer(Server):
|
|
1634
1638
|
|
1635
1639
|
async def get_provider_models(provider: Provider) -> list[LLMConfig]:
|
1636
1640
|
try:
|
1637
|
-
|
1641
|
+
async with asyncio.timeout(constants.GET_PROVIDERS_TIMEOUT_SECONDS):
|
1642
|
+
return await provider.list_llm_models_async()
|
1643
|
+
except asyncio.TimeoutError:
|
1644
|
+
warnings.warn(f"Timeout while listing LLM models for provider {provider}")
|
1645
|
+
return []
|
1638
1646
|
except Exception as e:
|
1639
|
-
import traceback
|
1640
|
-
|
1641
1647
|
traceback.print_exc()
|
1642
|
-
warnings.warn(f"
|
1648
|
+
warnings.warn(f"Error while listing LLM models for provider {provider}: {e}")
|
1643
1649
|
return []
|
1644
1650
|
|
1645
1651
|
# Execute all provider model listing tasks concurrently
|