letta-nightly 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +9 -3
- letta/agents/base_agent.py +2 -2
- letta/agents/letta_agent.py +56 -45
- letta/agents/voice_agent.py +2 -2
- letta/data_sources/redis_client.py +146 -1
- letta/errors.py +4 -0
- letta/functions/function_sets/files.py +2 -2
- letta/functions/mcp_client/types.py +30 -6
- letta/functions/schema_generator.py +46 -1
- letta/functions/schema_validator.py +17 -2
- letta/functions/types.py +1 -1
- letta/helpers/tool_execution_helper.py +0 -2
- letta/llm_api/anthropic_client.py +27 -5
- letta/llm_api/deepseek_client.py +97 -0
- letta/llm_api/groq_client.py +79 -0
- letta/llm_api/helpers.py +0 -1
- letta/llm_api/llm_api_tools.py +2 -113
- letta/llm_api/llm_client.py +21 -0
- letta/llm_api/llm_client_base.py +11 -9
- letta/llm_api/openai_client.py +3 -0
- letta/llm_api/xai_client.py +85 -0
- letta/prompts/prompt_generator.py +190 -0
- letta/schemas/agent_file.py +17 -2
- letta/schemas/file.py +24 -1
- letta/schemas/job.py +2 -0
- letta/schemas/letta_message.py +2 -0
- letta/schemas/letta_request.py +22 -0
- letta/schemas/message.py +10 -1
- letta/schemas/providers/bedrock.py +1 -0
- letta/server/rest_api/redis_stream_manager.py +300 -0
- letta/server/rest_api/routers/v1/agents.py +129 -7
- letta/server/rest_api/routers/v1/folders.py +15 -5
- letta/server/rest_api/routers/v1/runs.py +101 -11
- letta/server/rest_api/routers/v1/sources.py +21 -53
- letta/server/rest_api/routers/v1/telemetry.py +14 -4
- letta/server/rest_api/routers/v1/tools.py +2 -2
- letta/server/rest_api/streaming_response.py +3 -24
- letta/server/server.py +0 -1
- letta/services/agent_manager.py +2 -2
- letta/services/agent_serialization_manager.py +129 -32
- letta/services/file_manager.py +111 -6
- letta/services/file_processor/file_processor.py +5 -2
- letta/services/files_agents_manager.py +60 -0
- letta/services/helpers/agent_manager_helper.py +4 -205
- letta/services/helpers/tool_parser_helper.py +6 -3
- letta/services/mcp/base_client.py +7 -1
- letta/services/mcp/sse_client.py +7 -2
- letta/services/mcp/stdio_client.py +5 -0
- letta/services/mcp/streamable_http_client.py +11 -2
- letta/services/mcp_manager.py +31 -30
- letta/services/source_manager.py +26 -1
- letta/services/summarizer/summarizer.py +21 -10
- letta/services/tool_executor/files_tool_executor.py +13 -9
- letta/services/tool_executor/mcp_tool_executor.py +3 -0
- letta/services/tool_executor/tool_execution_manager.py +13 -0
- letta/services/tool_manager.py +43 -20
- letta/settings.py +1 -0
- letta/utils.py +37 -0
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/RECORD +64 -63
- letta/functions/mcp_client/__init__.py +0 -0
- letta/functions/mcp_client/base_client.py +0 -156
- letta/functions/mcp_client/sse_client.py +0 -51
- letta/functions/mcp_client/stdio_client.py +0 -109
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/entry_points.txt +0 -0
@@ -10,6 +10,7 @@ import anyio
|
|
10
10
|
from fastapi.responses import StreamingResponse
|
11
11
|
from starlette.types import Send
|
12
12
|
|
13
|
+
from letta.errors import LettaUnexpectedStreamCancellationError
|
13
14
|
from letta.log import get_logger
|
14
15
|
from letta.schemas.enums import JobStatus
|
15
16
|
from letta.schemas.letta_ping import LettaPing
|
@@ -288,33 +289,11 @@ class StreamingResponseWithStatusCode(StreamingResponse):
|
|
288
289
|
|
289
290
|
# Handle client timeouts (should throw error to inform user)
|
290
291
|
except asyncio.CancelledError as exc:
|
291
|
-
logger.warning("Stream was
|
292
|
+
logger.warning("Stream was terminated due to unexpected cancellation from server")
|
292
293
|
# Handle unexpected cancellation with error
|
293
294
|
more_body = False
|
294
|
-
error_resp = {"error": {"message": "Request was unexpectedly cancelled (likely due to client timeout or disconnection)"}}
|
295
|
-
error_event = f"event: error\ndata: {json.dumps(error_resp)}\n\n".encode(self.charset)
|
296
|
-
if not self.response_started:
|
297
|
-
await send(
|
298
|
-
{
|
299
|
-
"type": "http.response.start",
|
300
|
-
"status": 408, # Request Timeout
|
301
|
-
"headers": self.raw_headers,
|
302
|
-
}
|
303
|
-
)
|
304
|
-
raise
|
305
|
-
if self._client_connected:
|
306
|
-
try:
|
307
|
-
await send(
|
308
|
-
{
|
309
|
-
"type": "http.response.body",
|
310
|
-
"body": error_event,
|
311
|
-
"more_body": more_body,
|
312
|
-
}
|
313
|
-
)
|
314
|
-
except anyio.ClosedResourceError:
|
315
|
-
self._client_connected = False
|
316
295
|
capture_sentry_exception(exc)
|
317
|
-
|
296
|
+
raise LettaUnexpectedStreamCancellationError("Stream was terminated due to unexpected cancellation from server")
|
318
297
|
|
319
298
|
except Exception as exc:
|
320
299
|
logger.exception("Unhandled Streaming Error")
|
letta/server/server.py
CHANGED
@@ -2068,7 +2068,6 @@ class SyncServer(Server):
|
|
2068
2068
|
raise ValueError(f"No client was created for MCP server: {mcp_server_name}")
|
2069
2069
|
|
2070
2070
|
tools = await self.mcp_clients[mcp_server_name].list_tools()
|
2071
|
-
|
2072
2071
|
# Add health information to each tool
|
2073
2072
|
for tool in tools:
|
2074
2073
|
if tool.inputSchema:
|
letta/services/agent_manager.py
CHANGED
@@ -42,6 +42,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable
|
|
42
42
|
from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel
|
43
43
|
from letta.orm.sqlalchemy_base import AccessType
|
44
44
|
from letta.otel.tracing import trace_method
|
45
|
+
from letta.prompts.prompt_generator import PromptGenerator
|
45
46
|
from letta.schemas.agent import AgentState as PydanticAgentState
|
46
47
|
from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type
|
47
48
|
from letta.schemas.block import DEFAULT_BLOCKS
|
@@ -89,7 +90,6 @@ from letta.services.helpers.agent_manager_helper import (
|
|
89
90
|
check_supports_structured_output,
|
90
91
|
compile_system_message,
|
91
92
|
derive_system_message,
|
92
|
-
get_system_message_from_compiled_memory,
|
93
93
|
initialize_message_sequence,
|
94
94
|
initialize_message_sequence_async,
|
95
95
|
package_initial_message_sequence,
|
@@ -1783,7 +1783,7 @@ class AgentManager:
|
|
1783
1783
|
|
1784
1784
|
# update memory (TODO: potentially update recall/archival stats separately)
|
1785
1785
|
|
1786
|
-
new_system_message_str = get_system_message_from_compiled_memory(
|
1786
|
+
new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
|
1787
1787
|
system_prompt=agent_state.system,
|
1788
1788
|
memory_with_sources=curr_memory_str,
|
1789
1789
|
in_context_memory_last_edit=memory_edit_timestamp,
|
@@ -1,8 +1,16 @@
|
|
1
|
+
import asyncio
|
2
|
+
import uuid
|
1
3
|
from datetime import datetime, timezone
|
2
4
|
from typing import Any, Dict, List, Optional
|
3
5
|
|
4
6
|
from letta.constants import MCP_TOOL_TAG_NAME_PREFIX
|
5
|
-
from letta.errors import
|
7
|
+
from letta.errors import (
|
8
|
+
AgentExportIdMappingError,
|
9
|
+
AgentExportProcessingError,
|
10
|
+
AgentFileExportError,
|
11
|
+
AgentFileImportError,
|
12
|
+
AgentNotFoundForExportError,
|
13
|
+
)
|
6
14
|
from letta.helpers.pinecone_utils import should_use_pinecone
|
7
15
|
from letta.log import get_logger
|
8
16
|
from letta.schemas.agent import AgentState, CreateAgent
|
@@ -420,6 +428,8 @@ class AgentSerializationManager:
|
|
420
428
|
self,
|
421
429
|
schema: AgentFileSchema,
|
422
430
|
actor: User,
|
431
|
+
append_copy_suffix: bool = False,
|
432
|
+
override_existing_tools: bool = True,
|
423
433
|
dry_run: bool = False,
|
424
434
|
env_vars: Optional[Dict[str, Any]] = None,
|
425
435
|
) -> ImportResult:
|
@@ -481,7 +491,9 @@ class AgentSerializationManager:
|
|
481
491
|
pydantic_tools.append(Tool(**tool_schema.model_dump(exclude={"id"})))
|
482
492
|
|
483
493
|
# bulk upsert all tools at once
|
484
|
-
created_tools = await self.tool_manager.bulk_upsert_tools_async(
|
494
|
+
created_tools = await self.tool_manager.bulk_upsert_tools_async(
|
495
|
+
pydantic_tools, actor, override_existing_tools=override_existing_tools
|
496
|
+
)
|
485
497
|
|
486
498
|
# map file ids to database ids
|
487
499
|
# note: tools are matched by name during upsert, so we need to match by name here too
|
@@ -513,8 +525,20 @@ class AgentSerializationManager:
|
|
513
525
|
if schema.sources:
|
514
526
|
# convert source schemas to pydantic sources
|
515
527
|
pydantic_sources = []
|
528
|
+
|
529
|
+
# First, do a fast batch check for existing source names to avoid conflicts
|
530
|
+
source_names_to_check = [s.name for s in schema.sources]
|
531
|
+
existing_source_names = await self.source_manager.get_existing_source_names(source_names_to_check, actor)
|
532
|
+
|
516
533
|
for source_schema in schema.sources:
|
517
534
|
source_data = source_schema.model_dump(exclude={"id", "embedding", "embedding_chunk_size"})
|
535
|
+
|
536
|
+
# Check if source name already exists, if so add unique suffix
|
537
|
+
original_name = source_data["name"]
|
538
|
+
if original_name in existing_source_names:
|
539
|
+
unique_suffix = uuid.uuid4().hex[:8]
|
540
|
+
source_data["name"] = f"{original_name}_{unique_suffix}"
|
541
|
+
|
518
542
|
pydantic_sources.append(Source(**source_data))
|
519
543
|
|
520
544
|
# bulk upsert all sources at once
|
@@ -523,13 +547,15 @@ class AgentSerializationManager:
|
|
523
547
|
# map file ids to database ids
|
524
548
|
# note: sources are matched by name during upsert, so we need to match by name here too
|
525
549
|
created_sources_by_name = {source.name: source for source in created_sources}
|
526
|
-
for source_schema in schema.sources:
|
527
|
-
|
550
|
+
for i, source_schema in enumerate(schema.sources):
|
551
|
+
# Use the pydantic source name (which may have been modified for uniqueness)
|
552
|
+
source_name = pydantic_sources[i].name
|
553
|
+
created_source = created_sources_by_name.get(source_name)
|
528
554
|
if created_source:
|
529
555
|
file_to_db_ids[source_schema.id] = created_source.id
|
530
556
|
imported_count += 1
|
531
557
|
else:
|
532
|
-
logger.warning(f"Source {
|
558
|
+
logger.warning(f"Source {source_name} was not created during bulk upsert")
|
533
559
|
|
534
560
|
# 4. Create files (depends on sources)
|
535
561
|
for file_schema in schema.files:
|
@@ -548,38 +574,49 @@ class AgentSerializationManager:
|
|
548
574
|
imported_count += 1
|
549
575
|
|
550
576
|
# 5. Process files for chunking/embedding (depends on files and sources)
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
577
|
+
# Start background tasks for file processing
|
578
|
+
background_tasks = []
|
579
|
+
if schema.files and any(f.content for f in schema.files):
|
580
|
+
if should_use_pinecone():
|
581
|
+
embedder = PineconeEmbedder(embedding_config=schema.agents[0].embedding_config)
|
582
|
+
else:
|
583
|
+
embedder = OpenAIEmbedder(embedding_config=schema.agents[0].embedding_config)
|
584
|
+
file_processor = FileProcessor(
|
585
|
+
file_parser=self.file_parser,
|
586
|
+
embedder=embedder,
|
587
|
+
actor=actor,
|
588
|
+
using_pinecone=self.using_pinecone,
|
589
|
+
)
|
561
590
|
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
591
|
+
for file_schema in schema.files:
|
592
|
+
if file_schema.content: # Only process files with content
|
593
|
+
file_db_id = file_to_db_ids[file_schema.id]
|
594
|
+
source_db_id = file_to_db_ids[file_schema.source_id]
|
566
595
|
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
596
|
+
# Get the created file metadata (with caching)
|
597
|
+
if file_db_id not in file_metadata_cache:
|
598
|
+
file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(file_db_id, actor)
|
599
|
+
file_metadata = file_metadata_cache[file_db_id]
|
571
600
|
|
572
|
-
|
573
|
-
|
601
|
+
# Save the db call of fetching content again
|
602
|
+
file_metadata.content = file_schema.content
|
574
603
|
|
575
|
-
|
576
|
-
|
577
|
-
|
604
|
+
# Create background task for file processing
|
605
|
+
# TODO: This can be moved to celery or RQ or something
|
606
|
+
task = asyncio.create_task(
|
607
|
+
self._process_file_async(
|
608
|
+
file_metadata=file_metadata, source_id=source_db_id, file_processor=file_processor, actor=actor
|
609
|
+
)
|
610
|
+
)
|
611
|
+
background_tasks.append(task)
|
612
|
+
logger.info(f"Started background processing for file {file_metadata.file_name} (ID: {file_db_id})")
|
578
613
|
|
579
614
|
# 6. Create agents with empty message history
|
580
615
|
for agent_schema in schema.agents:
|
581
616
|
# Convert AgentSchema back to CreateAgent, remapping tool/block IDs
|
582
617
|
agent_data = agent_schema.model_dump(exclude={"id", "in_context_message_ids", "messages"})
|
618
|
+
if append_copy_suffix:
|
619
|
+
agent_data["name"] = agent_data.get("name") + "_copy"
|
583
620
|
|
584
621
|
# Remap tool_ids from file IDs to database IDs
|
585
622
|
if agent_data.get("tool_ids"):
|
@@ -589,6 +626,10 @@ class AgentSerializationManager:
|
|
589
626
|
if agent_data.get("block_ids"):
|
590
627
|
agent_data["block_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["block_ids"]]
|
591
628
|
|
629
|
+
# Remap source_ids from file IDs to database IDs
|
630
|
+
if agent_data.get("source_ids"):
|
631
|
+
agent_data["source_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["source_ids"]]
|
632
|
+
|
592
633
|
if env_vars:
|
593
634
|
for var in agent_data["tool_exec_environment_variables"]:
|
594
635
|
var["value"] = env_vars.get(var["key"], "")
|
@@ -635,14 +676,16 @@ class AgentSerializationManager:
|
|
635
676
|
for file_agent_schema in agent_schema.files_agents:
|
636
677
|
file_db_id = file_to_db_ids[file_agent_schema.file_id]
|
637
678
|
|
638
|
-
# Use cached file metadata if available
|
679
|
+
# Use cached file metadata if available (with content)
|
639
680
|
if file_db_id not in file_metadata_cache:
|
640
|
-
file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(
|
681
|
+
file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(
|
682
|
+
file_db_id, actor, include_content=True
|
683
|
+
)
|
641
684
|
file_metadata = file_metadata_cache[file_db_id]
|
642
685
|
files_for_agent.append(file_metadata)
|
643
686
|
|
644
687
|
if file_agent_schema.visible_content:
|
645
|
-
visible_content_map[
|
688
|
+
visible_content_map[file_metadata.file_name] = file_agent_schema.visible_content
|
646
689
|
|
647
690
|
# Bulk attach files to agent
|
648
691
|
await self.file_agent_manager.attach_files_bulk(
|
@@ -669,9 +712,19 @@ class AgentSerializationManager:
|
|
669
712
|
file_to_db_ids[group.id] = created_group.id
|
670
713
|
imported_count += 1
|
671
714
|
|
715
|
+
# prepare result message
|
716
|
+
num_background_tasks = len(background_tasks)
|
717
|
+
if num_background_tasks > 0:
|
718
|
+
message = (
|
719
|
+
f"Import completed successfully. Imported {imported_count} entities. "
|
720
|
+
f"{num_background_tasks} file(s) are being processed in the background for embeddings."
|
721
|
+
)
|
722
|
+
else:
|
723
|
+
message = f"Import completed successfully. Imported {imported_count} entities."
|
724
|
+
|
672
725
|
return ImportResult(
|
673
726
|
success=True,
|
674
|
-
message=
|
727
|
+
message=message,
|
675
728
|
imported_count=imported_count,
|
676
729
|
imported_agent_ids=imported_agent_ids,
|
677
730
|
id_mappings=file_to_db_ids,
|
@@ -849,3 +902,47 @@ class AgentSerializationManager:
|
|
849
902
|
except AttributeError:
|
850
903
|
allowed = model_cls.__fields__.keys() # Pydantic v1
|
851
904
|
return {k: v for k, v in data.items() if k in allowed}
|
905
|
+
|
906
|
+
async def _process_file_async(self, file_metadata: FileMetadata, source_id: str, file_processor: FileProcessor, actor: User):
|
907
|
+
"""
|
908
|
+
Process a file asynchronously in the background.
|
909
|
+
|
910
|
+
This method handles chunking and embedding of file content without blocking
|
911
|
+
the main import process.
|
912
|
+
|
913
|
+
Args:
|
914
|
+
file_metadata: The file metadata with content
|
915
|
+
source_id: The database ID of the source
|
916
|
+
file_processor: The file processor instance to use
|
917
|
+
actor: The user performing the action
|
918
|
+
"""
|
919
|
+
file_id = file_metadata.id
|
920
|
+
file_name = file_metadata.file_name
|
921
|
+
|
922
|
+
try:
|
923
|
+
logger.info(f"Starting background processing for file {file_name} (ID: {file_id})")
|
924
|
+
|
925
|
+
# process the file for chunking/embedding
|
926
|
+
passages = await file_processor.process_imported_file(file_metadata=file_metadata, source_id=source_id)
|
927
|
+
|
928
|
+
logger.info(f"Successfully processed file {file_name} with {len(passages)} passages")
|
929
|
+
|
930
|
+
# file status is automatically updated to COMPLETED by process_imported_file
|
931
|
+
return passages
|
932
|
+
|
933
|
+
except Exception as e:
|
934
|
+
logger.error(f"Failed to process file {file_name} (ID: {file_id}) in background: {e}")
|
935
|
+
|
936
|
+
# update file status to ERROR
|
937
|
+
try:
|
938
|
+
await self.file_manager.update_file_status(
|
939
|
+
file_id=file_id,
|
940
|
+
actor=actor,
|
941
|
+
processing_status=FileProcessingStatus.ERROR,
|
942
|
+
error_message=str(e) if str(e) else f"Agent serialization failed: {type(e).__name__}",
|
943
|
+
)
|
944
|
+
except Exception as update_error:
|
945
|
+
logger.error(f"Failed to update file status to ERROR for {file_id}: {update_error}")
|
946
|
+
|
947
|
+
# we don't re-raise here since this is a background task
|
948
|
+
# the file will be marked as ERROR and the import can continue
|
letta/services/file_manager.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import os
|
3
|
-
from datetime import datetime
|
3
|
+
from datetime import datetime, timedelta, timezone
|
4
4
|
from typing import List, Optional
|
5
5
|
|
6
6
|
from sqlalchemy import func, select, update
|
@@ -9,6 +9,8 @@ from sqlalchemy.exc import IntegrityError
|
|
9
9
|
from sqlalchemy.orm import selectinload
|
10
10
|
|
11
11
|
from letta.constants import MAX_FILENAME_LENGTH
|
12
|
+
from letta.helpers.pinecone_utils import list_pinecone_index_for_files, should_use_pinecone
|
13
|
+
from letta.log import get_logger
|
12
14
|
from letta.orm.errors import NoResultFound
|
13
15
|
from letta.orm.file import FileContent as FileContentModel
|
14
16
|
from letta.orm.file import FileMetadata as FileMetadataModel
|
@@ -20,8 +22,11 @@ from letta.schemas.source import Source as PydanticSource
|
|
20
22
|
from letta.schemas.source_metadata import FileStats, OrganizationSourcesStats, SourceStats
|
21
23
|
from letta.schemas.user import User as PydanticUser
|
22
24
|
from letta.server.db import db_registry
|
25
|
+
from letta.settings import settings
|
23
26
|
from letta.utils import enforce_types
|
24
27
|
|
28
|
+
logger = get_logger(__name__)
|
29
|
+
|
25
30
|
|
26
31
|
class DuplicateFileError(Exception):
|
27
32
|
"""Raised when a duplicate file is encountered and error handling is specified"""
|
@@ -174,6 +179,10 @@ class FileManager:
|
|
174
179
|
if processing_status is None and error_message is None and total_chunks is None and chunks_embedded is None:
|
175
180
|
raise ValueError("Nothing to update")
|
176
181
|
|
182
|
+
# validate that ERROR status must have an error message
|
183
|
+
if processing_status == FileProcessingStatus.ERROR and not error_message:
|
184
|
+
raise ValueError("Error message is required when setting processing status to ERROR")
|
185
|
+
|
177
186
|
values: dict[str, object] = {"updated_at": datetime.utcnow()}
|
178
187
|
if processing_status is not None:
|
179
188
|
values["processing_status"] = processing_status
|
@@ -273,6 +282,79 @@ class FileManager:
|
|
273
282
|
)
|
274
283
|
return await file_orm.to_pydantic_async()
|
275
284
|
|
285
|
+
@enforce_types
|
286
|
+
@trace_method
|
287
|
+
async def check_and_update_file_status(
|
288
|
+
self,
|
289
|
+
file_metadata: PydanticFileMetadata,
|
290
|
+
actor: PydanticUser,
|
291
|
+
) -> PydanticFileMetadata:
|
292
|
+
"""
|
293
|
+
Check and update file status for timeout and embedding completion.
|
294
|
+
|
295
|
+
This method consolidates logic for:
|
296
|
+
1. Checking if a file has timed out during processing
|
297
|
+
2. Checking Pinecone embedding status and updating counts
|
298
|
+
|
299
|
+
Args:
|
300
|
+
file_metadata: The file metadata to check
|
301
|
+
actor: User performing the check
|
302
|
+
|
303
|
+
Returns:
|
304
|
+
Updated file metadata with current status
|
305
|
+
"""
|
306
|
+
# check for timeout if status is not terminal
|
307
|
+
if not file_metadata.processing_status.is_terminal_state():
|
308
|
+
if file_metadata.created_at:
|
309
|
+
# handle timezone differences between PostgreSQL (timezone-aware) and SQLite (timezone-naive)
|
310
|
+
if settings.letta_pg_uri_no_default:
|
311
|
+
# postgresql: both datetimes are timezone-aware
|
312
|
+
timeout_threshold = datetime.now(timezone.utc) - timedelta(minutes=settings.file_processing_timeout_minutes)
|
313
|
+
file_created_at = file_metadata.created_at
|
314
|
+
else:
|
315
|
+
# sqlite: both datetimes should be timezone-naive
|
316
|
+
timeout_threshold = datetime.utcnow() - timedelta(minutes=settings.file_processing_timeout_minutes)
|
317
|
+
file_created_at = file_metadata.created_at
|
318
|
+
|
319
|
+
if file_created_at < timeout_threshold:
|
320
|
+
# move file to error status with timeout message
|
321
|
+
timeout_message = settings.file_processing_timeout_error_message.format(settings.file_processing_timeout_minutes)
|
322
|
+
try:
|
323
|
+
file_metadata = await self.update_file_status(
|
324
|
+
file_id=file_metadata.id,
|
325
|
+
actor=actor,
|
326
|
+
processing_status=FileProcessingStatus.ERROR,
|
327
|
+
error_message=timeout_message,
|
328
|
+
)
|
329
|
+
except ValueError as e:
|
330
|
+
# state transition was blocked - log it but don't fail
|
331
|
+
logger.warning(f"Could not update file to timeout error state: {str(e)}")
|
332
|
+
# continue with existing file_metadata
|
333
|
+
|
334
|
+
# check pinecone embedding status
|
335
|
+
if should_use_pinecone() and file_metadata.processing_status == FileProcessingStatus.EMBEDDING:
|
336
|
+
ids = await list_pinecone_index_for_files(file_id=file_metadata.id, actor=actor)
|
337
|
+
logger.info(
|
338
|
+
f"Embedded chunks {len(ids)}/{file_metadata.total_chunks} for {file_metadata.id} ({file_metadata.file_name}) in organization {actor.organization_id}"
|
339
|
+
)
|
340
|
+
|
341
|
+
if len(ids) != file_metadata.chunks_embedded or len(ids) == file_metadata.total_chunks:
|
342
|
+
if len(ids) != file_metadata.total_chunks:
|
343
|
+
file_status = file_metadata.processing_status
|
344
|
+
else:
|
345
|
+
file_status = FileProcessingStatus.COMPLETED
|
346
|
+
try:
|
347
|
+
file_metadata = await self.update_file_status(
|
348
|
+
file_id=file_metadata.id, actor=actor, chunks_embedded=len(ids), processing_status=file_status
|
349
|
+
)
|
350
|
+
except ValueError as e:
|
351
|
+
# state transition was blocked - this is a race condition
|
352
|
+
# log it but don't fail since we're just checking status
|
353
|
+
logger.warning(f"Race condition detected in check_and_update_file_status: {str(e)}")
|
354
|
+
# return the current file state without updating
|
355
|
+
|
356
|
+
return file_metadata
|
357
|
+
|
276
358
|
@enforce_types
|
277
359
|
@trace_method
|
278
360
|
async def upsert_file_content(
|
@@ -328,8 +410,22 @@ class FileManager:
|
|
328
410
|
limit: Optional[int] = 50,
|
329
411
|
include_content: bool = False,
|
330
412
|
strip_directory_prefix: bool = False,
|
413
|
+
check_status_updates: bool = False,
|
331
414
|
) -> List[PydanticFileMetadata]:
|
332
|
-
"""List all files with optional pagination.
|
415
|
+
"""List all files with optional pagination and status checking.
|
416
|
+
|
417
|
+
Args:
|
418
|
+
source_id: Source to list files from
|
419
|
+
actor: User performing the request
|
420
|
+
after: Pagination cursor
|
421
|
+
limit: Maximum number of files to return
|
422
|
+
include_content: Whether to include file content
|
423
|
+
strip_directory_prefix: Whether to strip directory prefix from filenames
|
424
|
+
check_status_updates: Whether to check and update status for timeout and embedding completion
|
425
|
+
|
426
|
+
Returns:
|
427
|
+
List of file metadata
|
428
|
+
"""
|
333
429
|
async with db_registry.async_session() as session:
|
334
430
|
options = [selectinload(FileMetadataModel.content)] if include_content else None
|
335
431
|
|
@@ -341,10 +437,19 @@ class FileManager:
|
|
341
437
|
source_id=source_id,
|
342
438
|
query_options=options,
|
343
439
|
)
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
440
|
+
|
441
|
+
# convert all files to pydantic models
|
442
|
+
file_metadatas = await asyncio.gather(
|
443
|
+
*[file.to_pydantic_async(include_content=include_content, strip_directory_prefix=strip_directory_prefix) for file in files]
|
444
|
+
)
|
445
|
+
|
446
|
+
# if status checking is enabled, check all files concurrently
|
447
|
+
if check_status_updates:
|
448
|
+
file_metadatas = await asyncio.gather(
|
449
|
+
*[self.check_and_update_file_status(file_metadata, actor) for file_metadata in file_metadatas]
|
450
|
+
)
|
451
|
+
|
452
|
+
return file_metadatas
|
348
453
|
|
349
454
|
@enforce_types
|
350
455
|
@trace_method
|
@@ -264,7 +264,10 @@ class FileProcessor:
|
|
264
264
|
},
|
265
265
|
)
|
266
266
|
await self.file_manager.update_file_status(
|
267
|
-
file_id=file_metadata.id,
|
267
|
+
file_id=file_metadata.id,
|
268
|
+
actor=self.actor,
|
269
|
+
processing_status=FileProcessingStatus.ERROR,
|
270
|
+
error_message=str(e) if str(e) else f"File processing failed: {type(e).__name__}",
|
268
271
|
)
|
269
272
|
|
270
273
|
return []
|
@@ -361,7 +364,7 @@ class FileProcessor:
|
|
361
364
|
file_id=file_metadata.id,
|
362
365
|
actor=self.actor,
|
363
366
|
processing_status=FileProcessingStatus.ERROR,
|
364
|
-
error_message=str(e),
|
367
|
+
error_message=str(e) if str(e) else f"Import file processing failed: {type(e).__name__}",
|
365
368
|
)
|
366
369
|
|
367
370
|
return []
|
@@ -293,6 +293,66 @@ class FileAgentManager:
|
|
293
293
|
else:
|
294
294
|
return [r.to_pydantic() for r in rows]
|
295
295
|
|
296
|
+
@enforce_types
|
297
|
+
@trace_method
|
298
|
+
async def list_files_for_agent_paginated(
|
299
|
+
self,
|
300
|
+
agent_id: str,
|
301
|
+
actor: PydanticUser,
|
302
|
+
cursor: Optional[str] = None,
|
303
|
+
limit: int = 20,
|
304
|
+
is_open: Optional[bool] = None,
|
305
|
+
) -> tuple[List[PydanticFileAgent], Optional[str], bool]:
|
306
|
+
"""
|
307
|
+
Return paginated file associations for an agent.
|
308
|
+
|
309
|
+
Args:
|
310
|
+
agent_id: The agent ID to get files for
|
311
|
+
actor: User performing the action
|
312
|
+
cursor: Pagination cursor (file-agent ID to start after)
|
313
|
+
limit: Maximum number of results to return
|
314
|
+
is_open: Optional filter for open/closed status (None = all, True = open only, False = closed only)
|
315
|
+
|
316
|
+
Returns:
|
317
|
+
Tuple of (file_agents, next_cursor, has_more)
|
318
|
+
"""
|
319
|
+
async with db_registry.async_session() as session:
|
320
|
+
conditions = [
|
321
|
+
FileAgentModel.agent_id == agent_id,
|
322
|
+
FileAgentModel.organization_id == actor.organization_id,
|
323
|
+
FileAgentModel.is_deleted == False,
|
324
|
+
]
|
325
|
+
|
326
|
+
# apply is_open filter if specified
|
327
|
+
if is_open is not None:
|
328
|
+
conditions.append(FileAgentModel.is_open == is_open)
|
329
|
+
|
330
|
+
# apply cursor if provided (get records after this ID)
|
331
|
+
if cursor:
|
332
|
+
conditions.append(FileAgentModel.id > cursor)
|
333
|
+
|
334
|
+
query = select(FileAgentModel).where(and_(*conditions))
|
335
|
+
|
336
|
+
# order by ID for stable pagination
|
337
|
+
query = query.order_by(FileAgentModel.id)
|
338
|
+
|
339
|
+
# fetch limit + 1 to check if there are more results
|
340
|
+
query = query.limit(limit + 1)
|
341
|
+
|
342
|
+
result = await session.execute(query)
|
343
|
+
rows = result.scalars().all()
|
344
|
+
|
345
|
+
# check if we got more records than requested (meaning there are more pages)
|
346
|
+
has_more = len(rows) > limit
|
347
|
+
if has_more:
|
348
|
+
# trim back to the requested limit
|
349
|
+
rows = rows[:limit]
|
350
|
+
|
351
|
+
# get cursor for next page (ID of last item in current page)
|
352
|
+
next_cursor = rows[-1].id if rows else None
|
353
|
+
|
354
|
+
return [r.to_pydantic() for r in rows], next_cursor, has_more
|
355
|
+
|
296
356
|
@enforce_types
|
297
357
|
@trace_method
|
298
358
|
async def list_agents_for_file(
|