letta-nightly 0.8.17.dev20250723104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +5 -3
- letta/agent.py +3 -2
- letta/agents/base_agent.py +4 -1
- letta/agents/voice_agent.py +1 -0
- letta/constants.py +4 -2
- letta/functions/schema_generator.py +2 -1
- letta/groups/dynamic_multi_agent.py +1 -0
- letta/helpers/converters.py +13 -5
- letta/helpers/json_helpers.py +6 -1
- letta/llm_api/anthropic.py +2 -2
- letta/llm_api/aws_bedrock.py +24 -94
- letta/llm_api/deepseek.py +1 -1
- letta/llm_api/google_ai_client.py +0 -38
- letta/llm_api/google_constants.py +6 -3
- letta/llm_api/helpers.py +1 -1
- letta/llm_api/llm_api_tools.py +4 -7
- letta/llm_api/mistral.py +12 -37
- letta/llm_api/openai.py +17 -17
- letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
- letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
- letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
- letta/local_llm/constants.py +2 -23
- letta/local_llm/json_parser.py +11 -1
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
- letta/local_llm/ollama/api.py +2 -2
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +33 -2
- letta/orm/files_agents.py +13 -10
- letta/orm/mixins.py +8 -0
- letta/orm/prompt.py +13 -0
- letta/orm/sqlite_functions.py +61 -17
- letta/otel/db_pool_monitoring.py +13 -12
- letta/schemas/agent.py +69 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -0
- letta/schemas/embedding_config.py +15 -3
- letta/schemas/enums.py +2 -0
- letta/schemas/file.py +1 -1
- letta/schemas/folder.py +74 -0
- letta/schemas/memory.py +12 -6
- letta/schemas/prompt.py +9 -0
- letta/schemas/providers/__init__.py +47 -0
- letta/schemas/providers/anthropic.py +78 -0
- letta/schemas/providers/azure.py +80 -0
- letta/schemas/providers/base.py +201 -0
- letta/schemas/providers/bedrock.py +78 -0
- letta/schemas/providers/cerebras.py +79 -0
- letta/schemas/providers/cohere.py +18 -0
- letta/schemas/providers/deepseek.py +63 -0
- letta/schemas/providers/google_gemini.py +102 -0
- letta/schemas/providers/google_vertex.py +54 -0
- letta/schemas/providers/groq.py +35 -0
- letta/schemas/providers/letta.py +39 -0
- letta/schemas/providers/lmstudio.py +97 -0
- letta/schemas/providers/mistral.py +41 -0
- letta/schemas/providers/ollama.py +151 -0
- letta/schemas/providers/openai.py +241 -0
- letta/schemas/providers/together.py +85 -0
- letta/schemas/providers/vllm.py +57 -0
- letta/schemas/providers/xai.py +66 -0
- letta/server/db.py +0 -5
- letta/server/rest_api/app.py +4 -3
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +152 -4
- letta/server/rest_api/routers/v1/folders.py +490 -0
- letta/server/rest_api/routers/v1/providers.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +21 -26
- letta/server/rest_api/routers/v1/tools.py +90 -15
- letta/server/server.py +50 -95
- letta/services/agent_manager.py +420 -81
- letta/services/agent_serialization_manager.py +707 -0
- letta/services/block_manager.py +132 -11
- letta/services/file_manager.py +104 -29
- letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
- letta/services/file_processor/file_processor.py +75 -24
- letta/services/file_processor/parser/markitdown_parser.py +95 -0
- letta/services/files_agents_manager.py +57 -17
- letta/services/group_manager.py +7 -0
- letta/services/helpers/agent_manager_helper.py +25 -15
- letta/services/provider_manager.py +2 -2
- letta/services/source_manager.py +35 -16
- letta/services/tool_executor/files_tool_executor.py +12 -5
- letta/services/tool_manager.py +12 -0
- letta/services/tool_sandbox/e2b_sandbox.py +52 -48
- letta/settings.py +9 -6
- letta/streaming_utils.py +2 -1
- letta/utils.py +34 -1
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,707 @@
|
|
1
|
+
from datetime import datetime, timezone
|
2
|
+
from typing import Dict, List
|
3
|
+
|
4
|
+
from letta.errors import AgentFileExportError, AgentFileImportError
|
5
|
+
from letta.helpers.pinecone_utils import should_use_pinecone
|
6
|
+
from letta.log import get_logger
|
7
|
+
from letta.schemas.agent import AgentState, CreateAgent
|
8
|
+
from letta.schemas.agent_file import (
|
9
|
+
AgentFileSchema,
|
10
|
+
AgentSchema,
|
11
|
+
BlockSchema,
|
12
|
+
FileAgentSchema,
|
13
|
+
FileSchema,
|
14
|
+
GroupSchema,
|
15
|
+
ImportResult,
|
16
|
+
MessageSchema,
|
17
|
+
SourceSchema,
|
18
|
+
ToolSchema,
|
19
|
+
)
|
20
|
+
from letta.schemas.block import Block
|
21
|
+
from letta.schemas.enums import FileProcessingStatus
|
22
|
+
from letta.schemas.file import FileMetadata
|
23
|
+
from letta.schemas.message import Message
|
24
|
+
from letta.schemas.source import Source
|
25
|
+
from letta.schemas.tool import Tool
|
26
|
+
from letta.schemas.user import User
|
27
|
+
from letta.services.agent_manager import AgentManager
|
28
|
+
from letta.services.block_manager import BlockManager
|
29
|
+
from letta.services.file_manager import FileManager
|
30
|
+
from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
|
31
|
+
from letta.services.file_processor.embedder.pinecone_embedder import PineconeEmbedder
|
32
|
+
from letta.services.file_processor.file_processor import FileProcessor
|
33
|
+
from letta.services.file_processor.parser.markitdown_parser import MarkitdownFileParser
|
34
|
+
from letta.services.file_processor.parser.mistral_parser import MistralFileParser
|
35
|
+
from letta.services.files_agents_manager import FileAgentManager
|
36
|
+
from letta.services.group_manager import GroupManager
|
37
|
+
from letta.services.mcp_manager import MCPManager
|
38
|
+
from letta.services.message_manager import MessageManager
|
39
|
+
from letta.services.source_manager import SourceManager
|
40
|
+
from letta.services.tool_manager import ToolManager
|
41
|
+
from letta.settings import settings
|
42
|
+
from letta.utils import get_latest_alembic_revision
|
43
|
+
|
44
|
+
logger = get_logger(__name__)
|
45
|
+
|
46
|
+
|
47
|
+
class AgentSerializationManager:
|
48
|
+
"""
|
49
|
+
Manages export and import of agent files between database and AgentFileSchema format.
|
50
|
+
|
51
|
+
Handles:
|
52
|
+
- ID mapping between database IDs and human-readable file IDs
|
53
|
+
- Coordination across multiple entity managers
|
54
|
+
- Transaction safety during imports
|
55
|
+
- Referential integrity validation
|
56
|
+
"""
|
57
|
+
|
58
|
+
def __init__(
|
59
|
+
self,
|
60
|
+
agent_manager: AgentManager,
|
61
|
+
tool_manager: ToolManager,
|
62
|
+
source_manager: SourceManager,
|
63
|
+
block_manager: BlockManager,
|
64
|
+
group_manager: GroupManager,
|
65
|
+
mcp_manager: MCPManager,
|
66
|
+
file_manager: FileManager,
|
67
|
+
file_agent_manager: FileAgentManager,
|
68
|
+
message_manager: MessageManager,
|
69
|
+
):
|
70
|
+
self.agent_manager = agent_manager
|
71
|
+
self.tool_manager = tool_manager
|
72
|
+
self.source_manager = source_manager
|
73
|
+
self.block_manager = block_manager
|
74
|
+
self.group_manager = group_manager
|
75
|
+
self.mcp_manager = mcp_manager
|
76
|
+
self.file_manager = file_manager
|
77
|
+
self.file_agent_manager = file_agent_manager
|
78
|
+
self.message_manager = message_manager
|
79
|
+
self.file_parser = MistralFileParser() if settings.mistral_api_key else MarkitdownFileParser()
|
80
|
+
self.using_pinecone = should_use_pinecone()
|
81
|
+
|
82
|
+
# ID mapping state for export
|
83
|
+
self._db_to_file_ids: Dict[str, str] = {}
|
84
|
+
|
85
|
+
# Counters for generating Stripe-style IDs
|
86
|
+
self._id_counters: Dict[str, int] = {
|
87
|
+
AgentSchema.__id_prefix__: 0,
|
88
|
+
GroupSchema.__id_prefix__: 0,
|
89
|
+
BlockSchema.__id_prefix__: 0,
|
90
|
+
FileSchema.__id_prefix__: 0,
|
91
|
+
SourceSchema.__id_prefix__: 0,
|
92
|
+
ToolSchema.__id_prefix__: 0,
|
93
|
+
MessageSchema.__id_prefix__: 0,
|
94
|
+
FileAgentSchema.__id_prefix__: 0,
|
95
|
+
# MCPServerSchema.__id_prefix__: 0,
|
96
|
+
}
|
97
|
+
|
98
|
+
def _reset_state(self):
|
99
|
+
"""Reset internal state for a new operation"""
|
100
|
+
self._db_to_file_ids.clear()
|
101
|
+
for key in self._id_counters:
|
102
|
+
self._id_counters[key] = 0
|
103
|
+
|
104
|
+
def _generate_file_id(self, entity_type: str) -> str:
|
105
|
+
"""Generate a Stripe-style ID for the given entity type"""
|
106
|
+
counter = self._id_counters[entity_type]
|
107
|
+
file_id = f"{entity_type}-{counter}"
|
108
|
+
self._id_counters[entity_type] += 1
|
109
|
+
return file_id
|
110
|
+
|
111
|
+
def _map_db_to_file_id(self, db_id: str, entity_type: str, allow_new: bool = True) -> str:
|
112
|
+
"""Map a database UUID to a file ID, creating if needed (export only)"""
|
113
|
+
if db_id in self._db_to_file_ids:
|
114
|
+
return self._db_to_file_ids[db_id]
|
115
|
+
|
116
|
+
if not allow_new:
|
117
|
+
raise AgentFileExportError(
|
118
|
+
f"Unexpected new {entity_type} ID '{db_id}' encountered during conversion. "
|
119
|
+
f"All IDs should have been mapped during agent processing."
|
120
|
+
)
|
121
|
+
|
122
|
+
file_id = self._generate_file_id(entity_type)
|
123
|
+
self._db_to_file_ids[db_id] = file_id
|
124
|
+
return file_id
|
125
|
+
|
126
|
+
def _extract_unique_tools(self, agent_states: List[AgentState]) -> List:
|
127
|
+
"""Extract unique tools across all agent states by ID"""
|
128
|
+
all_tools = []
|
129
|
+
for agent_state in agent_states:
|
130
|
+
if agent_state.tools:
|
131
|
+
all_tools.extend(agent_state.tools)
|
132
|
+
|
133
|
+
unique_tools = {}
|
134
|
+
for tool in all_tools:
|
135
|
+
unique_tools[tool.id] = tool
|
136
|
+
|
137
|
+
return sorted(unique_tools.values(), key=lambda x: x.name)
|
138
|
+
|
139
|
+
def _extract_unique_blocks(self, agent_states: List[AgentState]) -> List:
|
140
|
+
"""Extract unique blocks across all agent states by ID"""
|
141
|
+
all_blocks = []
|
142
|
+
for agent_state in agent_states:
|
143
|
+
if agent_state.memory and agent_state.memory.blocks:
|
144
|
+
all_blocks.extend(agent_state.memory.blocks)
|
145
|
+
|
146
|
+
unique_blocks = {}
|
147
|
+
for block in all_blocks:
|
148
|
+
unique_blocks[block.id] = block
|
149
|
+
|
150
|
+
return sorted(unique_blocks.values(), key=lambda x: x.label)
|
151
|
+
|
152
|
+
async def _extract_unique_sources_and_files_from_agents(
|
153
|
+
self, agent_states: List[AgentState], actor: User, files_agents_cache: dict = None
|
154
|
+
) -> tuple[List[Source], List[FileMetadata]]:
|
155
|
+
"""Extract unique sources and files from agent states using bulk operations"""
|
156
|
+
|
157
|
+
all_source_ids = set()
|
158
|
+
all_file_ids = set()
|
159
|
+
|
160
|
+
for agent_state in agent_states:
|
161
|
+
files_agents = await self.file_agent_manager.list_files_for_agent(
|
162
|
+
agent_id=agent_state.id,
|
163
|
+
actor=actor,
|
164
|
+
is_open_only=False,
|
165
|
+
return_as_blocks=False,
|
166
|
+
per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit,
|
167
|
+
)
|
168
|
+
# cache the results for reuse during conversion
|
169
|
+
if files_agents_cache is not None:
|
170
|
+
files_agents_cache[agent_state.id] = files_agents
|
171
|
+
|
172
|
+
for file_agent in files_agents:
|
173
|
+
all_source_ids.add(file_agent.source_id)
|
174
|
+
all_file_ids.add(file_agent.file_id)
|
175
|
+
sources = await self.source_manager.get_sources_by_ids_async(list(all_source_ids), actor)
|
176
|
+
files = await self.file_manager.get_files_by_ids_async(list(all_file_ids), actor, include_content=True)
|
177
|
+
|
178
|
+
return sources, files
|
179
|
+
|
180
|
+
async def _convert_agent_state_to_schema(self, agent_state: AgentState, actor: User, files_agents_cache: dict = None) -> AgentSchema:
|
181
|
+
"""Convert AgentState to AgentSchema with ID remapping"""
|
182
|
+
|
183
|
+
agent_file_id = self._map_db_to_file_id(agent_state.id, AgentSchema.__id_prefix__)
|
184
|
+
|
185
|
+
# use cached file-agent data if available, otherwise fetch
|
186
|
+
if files_agents_cache is not None and agent_state.id in files_agents_cache:
|
187
|
+
files_agents = files_agents_cache[agent_state.id]
|
188
|
+
else:
|
189
|
+
files_agents = await self.file_agent_manager.list_files_for_agent(
|
190
|
+
agent_id=agent_state.id,
|
191
|
+
actor=actor,
|
192
|
+
is_open_only=False,
|
193
|
+
return_as_blocks=False,
|
194
|
+
per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit,
|
195
|
+
)
|
196
|
+
agent_schema = await AgentSchema.from_agent_state(
|
197
|
+
agent_state, message_manager=self.message_manager, files_agents=files_agents, actor=actor
|
198
|
+
)
|
199
|
+
agent_schema.id = agent_file_id
|
200
|
+
|
201
|
+
if agent_schema.messages:
|
202
|
+
for message in agent_schema.messages:
|
203
|
+
message_file_id = self._map_db_to_file_id(message.id, MessageSchema.__id_prefix__)
|
204
|
+
message.id = message_file_id
|
205
|
+
message.agent_id = agent_file_id
|
206
|
+
|
207
|
+
if agent_schema.in_context_message_ids:
|
208
|
+
agent_schema.in_context_message_ids = [
|
209
|
+
self._map_db_to_file_id(message_id, MessageSchema.__id_prefix__, allow_new=False)
|
210
|
+
for message_id in agent_schema.in_context_message_ids
|
211
|
+
]
|
212
|
+
|
213
|
+
if agent_schema.tool_ids:
|
214
|
+
agent_schema.tool_ids = [self._map_db_to_file_id(tool_id, ToolSchema.__id_prefix__) for tool_id in agent_schema.tool_ids]
|
215
|
+
|
216
|
+
if agent_schema.source_ids:
|
217
|
+
agent_schema.source_ids = [
|
218
|
+
self._map_db_to_file_id(source_id, SourceSchema.__id_prefix__) for source_id in agent_schema.source_ids
|
219
|
+
]
|
220
|
+
|
221
|
+
if agent_schema.block_ids:
|
222
|
+
agent_schema.block_ids = [self._map_db_to_file_id(block_id, BlockSchema.__id_prefix__) for block_id in agent_schema.block_ids]
|
223
|
+
|
224
|
+
if agent_schema.files_agents:
|
225
|
+
for file_agent in agent_schema.files_agents:
|
226
|
+
file_agent.file_id = self._map_db_to_file_id(file_agent.file_id, FileSchema.__id_prefix__)
|
227
|
+
file_agent.source_id = self._map_db_to_file_id(file_agent.source_id, SourceSchema.__id_prefix__)
|
228
|
+
file_agent.agent_id = agent_file_id
|
229
|
+
|
230
|
+
return agent_schema
|
231
|
+
|
232
|
+
def _convert_tool_to_schema(self, tool) -> ToolSchema:
|
233
|
+
"""Convert Tool to ToolSchema with ID remapping"""
|
234
|
+
tool_file_id = self._map_db_to_file_id(tool.id, ToolSchema.__id_prefix__, allow_new=False)
|
235
|
+
tool_schema = ToolSchema.from_tool(tool)
|
236
|
+
tool_schema.id = tool_file_id
|
237
|
+
return tool_schema
|
238
|
+
|
239
|
+
def _convert_block_to_schema(self, block) -> BlockSchema:
|
240
|
+
"""Convert Block to BlockSchema with ID remapping"""
|
241
|
+
block_file_id = self._map_db_to_file_id(block.id, BlockSchema.__id_prefix__, allow_new=False)
|
242
|
+
block_schema = BlockSchema.from_block(block)
|
243
|
+
block_schema.id = block_file_id
|
244
|
+
return block_schema
|
245
|
+
|
246
|
+
def _convert_source_to_schema(self, source) -> SourceSchema:
|
247
|
+
"""Convert Source to SourceSchema with ID remapping"""
|
248
|
+
source_file_id = self._map_db_to_file_id(source.id, SourceSchema.__id_prefix__, allow_new=False)
|
249
|
+
source_schema = SourceSchema.from_source(source)
|
250
|
+
source_schema.id = source_file_id
|
251
|
+
return source_schema
|
252
|
+
|
253
|
+
def _convert_file_to_schema(self, file_metadata) -> FileSchema:
|
254
|
+
"""Convert FileMetadata to FileSchema with ID remapping"""
|
255
|
+
file_file_id = self._map_db_to_file_id(file_metadata.id, FileSchema.__id_prefix__, allow_new=False)
|
256
|
+
file_schema = FileSchema.from_file_metadata(file_metadata)
|
257
|
+
file_schema.id = file_file_id
|
258
|
+
file_schema.source_id = self._map_db_to_file_id(file_metadata.source_id, SourceSchema.__id_prefix__, allow_new=False)
|
259
|
+
return file_schema
|
260
|
+
|
261
|
+
async def export(self, agent_ids: List[str], actor: User) -> AgentFileSchema:
|
262
|
+
"""
|
263
|
+
Export agents and their related entities to AgentFileSchema format.
|
264
|
+
|
265
|
+
Args:
|
266
|
+
agent_ids: List of agent UUIDs to export
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
AgentFileSchema with all related entities
|
270
|
+
|
271
|
+
Raises:
|
272
|
+
AgentFileExportError: If export fails
|
273
|
+
"""
|
274
|
+
try:
|
275
|
+
self._reset_state()
|
276
|
+
|
277
|
+
agent_states = await self.agent_manager.get_agents_by_ids_async(agent_ids=agent_ids, actor=actor)
|
278
|
+
|
279
|
+
# Validate that all requested agents were found
|
280
|
+
if len(agent_states) != len(agent_ids):
|
281
|
+
found_ids = {agent.id for agent in agent_states}
|
282
|
+
missing_ids = [agent_id for agent_id in agent_ids if agent_id not in found_ids]
|
283
|
+
raise AgentFileExportError(f"The following agent IDs were not found: {missing_ids}")
|
284
|
+
|
285
|
+
# cache for file-agent relationships to avoid duplicate queries
|
286
|
+
files_agents_cache = {} # Maps agent_id to list of file_agent relationships
|
287
|
+
|
288
|
+
# Extract unique entities across all agents
|
289
|
+
tool_set = self._extract_unique_tools(agent_states)
|
290
|
+
block_set = self._extract_unique_blocks(agent_states)
|
291
|
+
|
292
|
+
# Extract sources and files from agent states BEFORE conversion (with caching)
|
293
|
+
source_set, file_set = await self._extract_unique_sources_and_files_from_agents(agent_states, actor, files_agents_cache)
|
294
|
+
|
295
|
+
# Convert to schemas with ID remapping (reusing cached file-agent data)
|
296
|
+
agent_schemas = [
|
297
|
+
await self._convert_agent_state_to_schema(agent_state, actor=actor, files_agents_cache=files_agents_cache)
|
298
|
+
for agent_state in agent_states
|
299
|
+
]
|
300
|
+
tool_schemas = [self._convert_tool_to_schema(tool) for tool in tool_set]
|
301
|
+
block_schemas = [self._convert_block_to_schema(block) for block in block_set]
|
302
|
+
source_schemas = [self._convert_source_to_schema(source) for source in source_set]
|
303
|
+
file_schemas = [self._convert_file_to_schema(file_metadata) for file_metadata in file_set]
|
304
|
+
|
305
|
+
logger.info(f"Exporting {len(agent_ids)} agents to agent file format")
|
306
|
+
|
307
|
+
# Return AgentFileSchema with converted entities
|
308
|
+
return AgentFileSchema(
|
309
|
+
agents=agent_schemas,
|
310
|
+
groups=[], # TODO: Extract and convert groups
|
311
|
+
blocks=block_schemas,
|
312
|
+
files=file_schemas,
|
313
|
+
sources=source_schemas,
|
314
|
+
tools=tool_schemas,
|
315
|
+
# mcp_servers=[], # TODO: Extract and convert MCP servers
|
316
|
+
metadata={"revision_id": await get_latest_alembic_revision()},
|
317
|
+
created_at=datetime.now(timezone.utc),
|
318
|
+
)
|
319
|
+
|
320
|
+
except Exception as e:
|
321
|
+
logger.error(f"Failed to export agent file: {e}")
|
322
|
+
raise AgentFileExportError(f"Export failed: {e}") from e
|
323
|
+
|
324
|
+
async def import_file(self, schema: AgentFileSchema, actor: User, dry_run: bool = False) -> ImportResult:
|
325
|
+
"""
|
326
|
+
Import AgentFileSchema into the database.
|
327
|
+
|
328
|
+
Args:
|
329
|
+
schema: The agent file schema to import
|
330
|
+
dry_run: If True, validate but don't commit changes
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
ImportResult with success status and details
|
334
|
+
|
335
|
+
Raises:
|
336
|
+
AgentFileImportError: If import fails
|
337
|
+
"""
|
338
|
+
try:
|
339
|
+
self._reset_state()
|
340
|
+
|
341
|
+
if dry_run:
|
342
|
+
logger.info("Starting dry run import validation")
|
343
|
+
else:
|
344
|
+
logger.info("Starting agent file import")
|
345
|
+
|
346
|
+
# Validate schema first
|
347
|
+
self._validate_schema(schema)
|
348
|
+
|
349
|
+
if dry_run:
|
350
|
+
return ImportResult(
|
351
|
+
success=True,
|
352
|
+
message="Dry run validation passed",
|
353
|
+
imported_count=0,
|
354
|
+
)
|
355
|
+
|
356
|
+
# Import in dependency order
|
357
|
+
imported_count = 0
|
358
|
+
file_to_db_ids = {} # Maps file IDs to new database IDs
|
359
|
+
# in-memory cache for file metadata to avoid repeated db calls
|
360
|
+
file_metadata_cache = {} # Maps database file ID to FileMetadata
|
361
|
+
|
362
|
+
# 1. Create tools first (no dependencies) - using bulk upsert for efficiency
|
363
|
+
if schema.tools:
|
364
|
+
# convert tool schemas to pydantic tools
|
365
|
+
pydantic_tools = []
|
366
|
+
for tool_schema in schema.tools:
|
367
|
+
pydantic_tools.append(Tool(**tool_schema.model_dump(exclude={"id"})))
|
368
|
+
|
369
|
+
# bulk upsert all tools at once
|
370
|
+
created_tools = await self.tool_manager.bulk_upsert_tools_async(pydantic_tools, actor)
|
371
|
+
|
372
|
+
# map file ids to database ids
|
373
|
+
# note: tools are matched by name during upsert, so we need to match by name here too
|
374
|
+
created_tools_by_name = {tool.name: tool for tool in created_tools}
|
375
|
+
for tool_schema in schema.tools:
|
376
|
+
created_tool = created_tools_by_name.get(tool_schema.name)
|
377
|
+
if created_tool:
|
378
|
+
file_to_db_ids[tool_schema.id] = created_tool.id
|
379
|
+
imported_count += 1
|
380
|
+
else:
|
381
|
+
logger.warning(f"Tool {tool_schema.name} was not created during bulk upsert")
|
382
|
+
|
383
|
+
# 2. Create blocks (no dependencies) - using batch create for efficiency
|
384
|
+
if schema.blocks:
|
385
|
+
# convert block schemas to pydantic blocks (excluding IDs to create new blocks)
|
386
|
+
pydantic_blocks = []
|
387
|
+
for block_schema in schema.blocks:
|
388
|
+
pydantic_blocks.append(Block(**block_schema.model_dump(exclude={"id"})))
|
389
|
+
|
390
|
+
# batch create all blocks at once
|
391
|
+
created_blocks = await self.block_manager.batch_create_blocks_async(pydantic_blocks, actor)
|
392
|
+
|
393
|
+
# map file ids to database ids
|
394
|
+
for block_schema, created_block in zip(schema.blocks, created_blocks):
|
395
|
+
file_to_db_ids[block_schema.id] = created_block.id
|
396
|
+
imported_count += 1
|
397
|
+
|
398
|
+
# 3. Create sources (no dependencies) - using bulk upsert for efficiency
|
399
|
+
if schema.sources:
|
400
|
+
# convert source schemas to pydantic sources
|
401
|
+
pydantic_sources = []
|
402
|
+
for source_schema in schema.sources:
|
403
|
+
source_data = source_schema.model_dump(exclude={"id", "embedding", "embedding_chunk_size"})
|
404
|
+
pydantic_sources.append(Source(**source_data))
|
405
|
+
|
406
|
+
# bulk upsert all sources at once
|
407
|
+
created_sources = await self.source_manager.bulk_upsert_sources_async(pydantic_sources, actor)
|
408
|
+
|
409
|
+
# map file ids to database ids
|
410
|
+
# note: sources are matched by name during upsert, so we need to match by name here too
|
411
|
+
created_sources_by_name = {source.name: source for source in created_sources}
|
412
|
+
for source_schema in schema.sources:
|
413
|
+
created_source = created_sources_by_name.get(source_schema.name)
|
414
|
+
if created_source:
|
415
|
+
file_to_db_ids[source_schema.id] = created_source.id
|
416
|
+
imported_count += 1
|
417
|
+
else:
|
418
|
+
logger.warning(f"Source {source_schema.name} was not created during bulk upsert")
|
419
|
+
|
420
|
+
# 4. Create files (depends on sources)
|
421
|
+
for file_schema in schema.files:
|
422
|
+
# Convert FileSchema back to FileMetadata
|
423
|
+
file_data = file_schema.model_dump(exclude={"id", "content"})
|
424
|
+
# Remap source_id from file ID to database ID
|
425
|
+
file_data["source_id"] = file_to_db_ids[file_schema.source_id]
|
426
|
+
# Set processing status to PARSING since we have parsed content but need to re-embed
|
427
|
+
file_data["processing_status"] = FileProcessingStatus.PARSING
|
428
|
+
file_data["error_message"] = None
|
429
|
+
file_data["total_chunks"] = None
|
430
|
+
file_data["chunks_embedded"] = None
|
431
|
+
file_metadata = FileMetadata(**file_data)
|
432
|
+
created_file = await self.file_manager.create_file(file_metadata, actor, text=file_schema.content)
|
433
|
+
file_to_db_ids[file_schema.id] = created_file.id
|
434
|
+
imported_count += 1
|
435
|
+
|
436
|
+
# 5. Process files for chunking/embedding (depends on files and sources)
|
437
|
+
if should_use_pinecone():
|
438
|
+
embedder = PineconeEmbedder(embedding_config=schema.agents[0].embedding_config)
|
439
|
+
else:
|
440
|
+
embedder = OpenAIEmbedder(embedding_config=schema.agents[0].embedding_config)
|
441
|
+
file_processor = FileProcessor(
|
442
|
+
file_parser=self.file_parser,
|
443
|
+
embedder=embedder,
|
444
|
+
actor=actor,
|
445
|
+
using_pinecone=self.using_pinecone,
|
446
|
+
)
|
447
|
+
|
448
|
+
for file_schema in schema.files:
|
449
|
+
if file_schema.content: # Only process files with content
|
450
|
+
file_db_id = file_to_db_ids[file_schema.id]
|
451
|
+
source_db_id = file_to_db_ids[file_schema.source_id]
|
452
|
+
|
453
|
+
# Get the created file metadata (with caching)
|
454
|
+
if file_db_id not in file_metadata_cache:
|
455
|
+
file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(file_db_id, actor)
|
456
|
+
file_metadata = file_metadata_cache[file_db_id]
|
457
|
+
|
458
|
+
# Save the db call of fetching content again
|
459
|
+
file_metadata.content = file_schema.content
|
460
|
+
|
461
|
+
# Process the file for chunking/embedding
|
462
|
+
passages = await file_processor.process_imported_file(file_metadata=file_metadata, source_id=source_db_id)
|
463
|
+
imported_count += len(passages)
|
464
|
+
|
465
|
+
# 6. Create agents with empty message history
|
466
|
+
for agent_schema in schema.agents:
|
467
|
+
# Convert AgentSchema back to CreateAgent, remapping tool/block IDs
|
468
|
+
agent_data = agent_schema.model_dump(exclude={"id", "in_context_message_ids", "messages"})
|
469
|
+
|
470
|
+
# Remap tool_ids from file IDs to database IDs
|
471
|
+
if agent_data.get("tool_ids"):
|
472
|
+
agent_data["tool_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["tool_ids"]]
|
473
|
+
|
474
|
+
# Remap block_ids from file IDs to database IDs
|
475
|
+
if agent_data.get("block_ids"):
|
476
|
+
agent_data["block_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["block_ids"]]
|
477
|
+
|
478
|
+
agent_create = CreateAgent(**agent_data)
|
479
|
+
created_agent = await self.agent_manager.create_agent_async(agent_create, actor, _init_with_no_messages=True)
|
480
|
+
file_to_db_ids[agent_schema.id] = created_agent.id
|
481
|
+
imported_count += 1
|
482
|
+
|
483
|
+
# 7. Create messages and update agent message_ids
|
484
|
+
for agent_schema in schema.agents:
|
485
|
+
agent_db_id = file_to_db_ids[agent_schema.id]
|
486
|
+
message_file_to_db_ids = {}
|
487
|
+
|
488
|
+
# Create messages for this agent
|
489
|
+
messages = []
|
490
|
+
for message_schema in agent_schema.messages:
|
491
|
+
# Convert MessageSchema back to Message, setting agent_id to new DB ID
|
492
|
+
message_data = message_schema.model_dump(exclude={"id"})
|
493
|
+
message_data["agent_id"] = agent_db_id # Remap agent_id to new database ID
|
494
|
+
message_obj = Message(**message_data)
|
495
|
+
messages.append(message_obj)
|
496
|
+
# Map file ID to the generated database ID immediately
|
497
|
+
message_file_to_db_ids[message_schema.id] = message_obj.id
|
498
|
+
|
499
|
+
created_messages = await self.message_manager.create_many_messages_async(pydantic_msgs=messages, actor=actor)
|
500
|
+
imported_count += len(created_messages)
|
501
|
+
|
502
|
+
# Remap in_context_message_ids from file IDs to database IDs
|
503
|
+
in_context_db_ids = [message_file_to_db_ids[message_schema_id] for message_schema_id in agent_schema.in_context_message_ids]
|
504
|
+
|
505
|
+
# Update agent with the correct message_ids
|
506
|
+
await self.agent_manager.update_message_ids_async(agent_id=agent_db_id, message_ids=in_context_db_ids, actor=actor)
|
507
|
+
|
508
|
+
# 8. Create file-agent relationships (depends on agents and files)
|
509
|
+
for agent_schema in schema.agents:
|
510
|
+
if agent_schema.files_agents:
|
511
|
+
agent_db_id = file_to_db_ids[agent_schema.id]
|
512
|
+
|
513
|
+
# Prepare files for bulk attachment
|
514
|
+
files_for_agent = []
|
515
|
+
visible_content_map = {}
|
516
|
+
|
517
|
+
for file_agent_schema in agent_schema.files_agents:
|
518
|
+
file_db_id = file_to_db_ids[file_agent_schema.file_id]
|
519
|
+
|
520
|
+
# Use cached file metadata if available
|
521
|
+
if file_db_id not in file_metadata_cache:
|
522
|
+
file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(file_db_id, actor)
|
523
|
+
file_metadata = file_metadata_cache[file_db_id]
|
524
|
+
files_for_agent.append(file_metadata)
|
525
|
+
|
526
|
+
if file_agent_schema.visible_content:
|
527
|
+
visible_content_map[file_db_id] = file_agent_schema.visible_content
|
528
|
+
|
529
|
+
# Bulk attach files to agent
|
530
|
+
await self.file_agent_manager.attach_files_bulk(
|
531
|
+
agent_id=agent_db_id,
|
532
|
+
files_metadata=files_for_agent,
|
533
|
+
visible_content_map=visible_content_map,
|
534
|
+
actor=actor,
|
535
|
+
max_files_open=agent_schema.max_files_open,
|
536
|
+
)
|
537
|
+
imported_count += len(files_for_agent)
|
538
|
+
|
539
|
+
return ImportResult(
|
540
|
+
success=True,
|
541
|
+
message=f"Import completed successfully. Imported {imported_count} entities.",
|
542
|
+
imported_count=imported_count,
|
543
|
+
id_mappings=file_to_db_ids,
|
544
|
+
)
|
545
|
+
|
546
|
+
except Exception as e:
|
547
|
+
logger.exception(f"Failed to import agent file: {e}")
|
548
|
+
raise AgentFileImportError(f"Import failed: {e}") from e
|
549
|
+
|
550
|
+
def _validate_id_format(self, schema: AgentFileSchema) -> List[str]:
|
551
|
+
"""Validate that all IDs follow the expected format"""
|
552
|
+
errors = []
|
553
|
+
|
554
|
+
# Define entity types and their expected prefixes
|
555
|
+
entity_checks = [
|
556
|
+
(schema.agents, AgentSchema.__id_prefix__),
|
557
|
+
(schema.groups, GroupSchema.__id_prefix__),
|
558
|
+
(schema.blocks, BlockSchema.__id_prefix__),
|
559
|
+
(schema.files, FileSchema.__id_prefix__),
|
560
|
+
(schema.sources, SourceSchema.__id_prefix__),
|
561
|
+
(schema.tools, ToolSchema.__id_prefix__),
|
562
|
+
]
|
563
|
+
|
564
|
+
for entities, expected_prefix in entity_checks:
|
565
|
+
for entity in entities:
|
566
|
+
if not entity.id.startswith(f"{expected_prefix}-"):
|
567
|
+
errors.append(f"Invalid ID format: {entity.id} should start with '{expected_prefix}-'")
|
568
|
+
else:
|
569
|
+
# Check that the suffix is a valid integer
|
570
|
+
try:
|
571
|
+
suffix = entity.id[len(expected_prefix) + 1 :]
|
572
|
+
int(suffix)
|
573
|
+
except ValueError:
|
574
|
+
errors.append(f"Invalid ID format: {entity.id} should have integer suffix")
|
575
|
+
|
576
|
+
# Also check message IDs within agents
|
577
|
+
for agent in schema.agents:
|
578
|
+
for message in agent.messages:
|
579
|
+
if not message.id.startswith(f"{MessageSchema.__id_prefix__}-"):
|
580
|
+
errors.append(f"Invalid message ID format: {message.id} should start with '{MessageSchema.__id_prefix__}-'")
|
581
|
+
else:
|
582
|
+
# Check that the suffix is a valid integer
|
583
|
+
try:
|
584
|
+
suffix = message.id[len(MessageSchema.__id_prefix__) + 1 :]
|
585
|
+
int(suffix)
|
586
|
+
except ValueError:
|
587
|
+
errors.append(f"Invalid message ID format: {message.id} should have integer suffix")
|
588
|
+
|
589
|
+
return errors
|
590
|
+
|
591
|
+
def _validate_duplicate_ids(self, schema: AgentFileSchema) -> List[str]:
|
592
|
+
"""Validate that there are no duplicate IDs within or across entity types"""
|
593
|
+
errors = []
|
594
|
+
all_ids = set()
|
595
|
+
|
596
|
+
# Check each entity type for internal duplicates and collect all IDs
|
597
|
+
entity_collections = [
|
598
|
+
("agents", schema.agents),
|
599
|
+
("groups", schema.groups),
|
600
|
+
("blocks", schema.blocks),
|
601
|
+
("files", schema.files),
|
602
|
+
("sources", schema.sources),
|
603
|
+
("tools", schema.tools),
|
604
|
+
]
|
605
|
+
|
606
|
+
for entity_type, entities in entity_collections:
|
607
|
+
entity_ids = [entity.id for entity in entities]
|
608
|
+
|
609
|
+
# Check for duplicates within this entity type
|
610
|
+
seen = set()
|
611
|
+
duplicates = set()
|
612
|
+
for entity_id in entity_ids:
|
613
|
+
if entity_id in seen:
|
614
|
+
duplicates.add(entity_id)
|
615
|
+
else:
|
616
|
+
seen.add(entity_id)
|
617
|
+
|
618
|
+
if duplicates:
|
619
|
+
errors.append(f"Duplicate {entity_type} IDs found: {duplicates}")
|
620
|
+
|
621
|
+
# Check for duplicates across all entity types
|
622
|
+
for entity_id in entity_ids:
|
623
|
+
if entity_id in all_ids:
|
624
|
+
errors.append(f"Duplicate ID across entity types: {entity_id}")
|
625
|
+
all_ids.add(entity_id)
|
626
|
+
|
627
|
+
# Also check message IDs within agents
|
628
|
+
for agent in schema.agents:
|
629
|
+
message_ids = [msg.id for msg in agent.messages]
|
630
|
+
|
631
|
+
# Check for duplicates within agent messages
|
632
|
+
seen = set()
|
633
|
+
duplicates = set()
|
634
|
+
for message_id in message_ids:
|
635
|
+
if message_id in seen:
|
636
|
+
duplicates.add(message_id)
|
637
|
+
else:
|
638
|
+
seen.add(message_id)
|
639
|
+
|
640
|
+
if duplicates:
|
641
|
+
errors.append(f"Duplicate message IDs in agent {agent.id}: {duplicates}")
|
642
|
+
|
643
|
+
# Check for duplicates across all entity types
|
644
|
+
for message_id in message_ids:
|
645
|
+
if message_id in all_ids:
|
646
|
+
errors.append(f"Duplicate ID across entity types: {message_id}")
|
647
|
+
all_ids.add(message_id)
|
648
|
+
|
649
|
+
return errors
|
650
|
+
|
651
|
+
def _validate_file_source_references(self, schema: AgentFileSchema) -> List[str]:
|
652
|
+
"""Validate that all file source_id references exist"""
|
653
|
+
errors = []
|
654
|
+
source_ids = {source.id for source in schema.sources}
|
655
|
+
|
656
|
+
for file in schema.files:
|
657
|
+
if file.source_id not in source_ids:
|
658
|
+
errors.append(f"File {file.id} references non-existent source {file.source_id}")
|
659
|
+
|
660
|
+
return errors
|
661
|
+
|
662
|
+
def _validate_file_agent_references(self, schema: AgentFileSchema) -> List[str]:
|
663
|
+
"""Validate that all file-agent relationships reference existing entities"""
|
664
|
+
errors = []
|
665
|
+
file_ids = {file.id for file in schema.files}
|
666
|
+
source_ids = {source.id for source in schema.sources}
|
667
|
+
{agent.id for agent in schema.agents}
|
668
|
+
|
669
|
+
for agent in schema.agents:
|
670
|
+
for file_agent in agent.files_agents:
|
671
|
+
if file_agent.file_id not in file_ids:
|
672
|
+
errors.append(f"File-agent relationship references non-existent file {file_agent.file_id}")
|
673
|
+
if file_agent.source_id not in source_ids:
|
674
|
+
errors.append(f"File-agent relationship references non-existent source {file_agent.source_id}")
|
675
|
+
if file_agent.agent_id != agent.id:
|
676
|
+
errors.append(f"File-agent relationship has mismatched agent_id {file_agent.agent_id} vs {agent.id}")
|
677
|
+
|
678
|
+
return errors
|
679
|
+
|
680
|
+
def _validate_schema(self, schema: AgentFileSchema):
|
681
|
+
"""
|
682
|
+
Validate the agent file schema for consistency and referential integrity.
|
683
|
+
|
684
|
+
Args:
|
685
|
+
schema: The schema to validate
|
686
|
+
|
687
|
+
Raises:
|
688
|
+
AgentFileImportError: If validation fails
|
689
|
+
"""
|
690
|
+
errors = []
|
691
|
+
|
692
|
+
# 1. ID Format Validation
|
693
|
+
errors.extend(self._validate_id_format(schema))
|
694
|
+
|
695
|
+
# 2. Duplicate ID Detection
|
696
|
+
errors.extend(self._validate_duplicate_ids(schema))
|
697
|
+
|
698
|
+
# 3. File Source Reference Validation
|
699
|
+
errors.extend(self._validate_file_source_references(schema))
|
700
|
+
|
701
|
+
# 4. File-Agent Reference Validation
|
702
|
+
errors.extend(self._validate_file_agent_references(schema))
|
703
|
+
|
704
|
+
if errors:
|
705
|
+
raise AgentFileImportError(f"Schema validation failed: {'; '.join(errors)}")
|
706
|
+
|
707
|
+
logger.info("Schema validation passed")
|