letta-nightly 0.8.17.dev20250723104501__py3-none-any.whl → 0.9.0.dev20250724104456__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +5 -3
  2. letta/agent.py +3 -2
  3. letta/agents/base_agent.py +4 -1
  4. letta/agents/voice_agent.py +1 -0
  5. letta/constants.py +4 -2
  6. letta/functions/schema_generator.py +2 -1
  7. letta/groups/dynamic_multi_agent.py +1 -0
  8. letta/helpers/converters.py +13 -5
  9. letta/helpers/json_helpers.py +6 -1
  10. letta/llm_api/anthropic.py +2 -2
  11. letta/llm_api/aws_bedrock.py +24 -94
  12. letta/llm_api/deepseek.py +1 -1
  13. letta/llm_api/google_ai_client.py +0 -38
  14. letta/llm_api/google_constants.py +6 -3
  15. letta/llm_api/helpers.py +1 -1
  16. letta/llm_api/llm_api_tools.py +4 -7
  17. letta/llm_api/mistral.py +12 -37
  18. letta/llm_api/openai.py +17 -17
  19. letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
  20. letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
  21. letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
  22. letta/local_llm/constants.py +2 -23
  23. letta/local_llm/json_parser.py +11 -1
  24. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
  25. letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
  26. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
  27. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
  28. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
  29. letta/local_llm/ollama/api.py +2 -2
  30. letta/orm/__init__.py +1 -0
  31. letta/orm/agent.py +33 -2
  32. letta/orm/files_agents.py +13 -10
  33. letta/orm/mixins.py +8 -0
  34. letta/orm/prompt.py +13 -0
  35. letta/orm/sqlite_functions.py +61 -17
  36. letta/otel/db_pool_monitoring.py +13 -12
  37. letta/schemas/agent.py +69 -4
  38. letta/schemas/agent_file.py +2 -0
  39. letta/schemas/block.py +11 -0
  40. letta/schemas/embedding_config.py +15 -3
  41. letta/schemas/enums.py +2 -0
  42. letta/schemas/file.py +1 -1
  43. letta/schemas/folder.py +74 -0
  44. letta/schemas/memory.py +12 -6
  45. letta/schemas/prompt.py +9 -0
  46. letta/schemas/providers/__init__.py +47 -0
  47. letta/schemas/providers/anthropic.py +78 -0
  48. letta/schemas/providers/azure.py +80 -0
  49. letta/schemas/providers/base.py +201 -0
  50. letta/schemas/providers/bedrock.py +78 -0
  51. letta/schemas/providers/cerebras.py +79 -0
  52. letta/schemas/providers/cohere.py +18 -0
  53. letta/schemas/providers/deepseek.py +63 -0
  54. letta/schemas/providers/google_gemini.py +102 -0
  55. letta/schemas/providers/google_vertex.py +54 -0
  56. letta/schemas/providers/groq.py +35 -0
  57. letta/schemas/providers/letta.py +39 -0
  58. letta/schemas/providers/lmstudio.py +97 -0
  59. letta/schemas/providers/mistral.py +41 -0
  60. letta/schemas/providers/ollama.py +151 -0
  61. letta/schemas/providers/openai.py +241 -0
  62. letta/schemas/providers/together.py +85 -0
  63. letta/schemas/providers/vllm.py +57 -0
  64. letta/schemas/providers/xai.py +66 -0
  65. letta/server/db.py +0 -5
  66. letta/server/rest_api/app.py +4 -3
  67. letta/server/rest_api/routers/v1/__init__.py +2 -0
  68. letta/server/rest_api/routers/v1/agents.py +152 -4
  69. letta/server/rest_api/routers/v1/folders.py +490 -0
  70. letta/server/rest_api/routers/v1/providers.py +2 -2
  71. letta/server/rest_api/routers/v1/sources.py +21 -26
  72. letta/server/rest_api/routers/v1/tools.py +90 -15
  73. letta/server/server.py +50 -95
  74. letta/services/agent_manager.py +420 -81
  75. letta/services/agent_serialization_manager.py +707 -0
  76. letta/services/block_manager.py +132 -11
  77. letta/services/file_manager.py +104 -29
  78. letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
  79. letta/services/file_processor/file_processor.py +75 -24
  80. letta/services/file_processor/parser/markitdown_parser.py +95 -0
  81. letta/services/files_agents_manager.py +57 -17
  82. letta/services/group_manager.py +7 -0
  83. letta/services/helpers/agent_manager_helper.py +25 -15
  84. letta/services/provider_manager.py +2 -2
  85. letta/services/source_manager.py +35 -16
  86. letta/services/tool_executor/files_tool_executor.py +12 -5
  87. letta/services/tool_manager.py +12 -0
  88. letta/services/tool_sandbox/e2b_sandbox.py +52 -48
  89. letta/settings.py +9 -6
  90. letta/streaming_utils.py +2 -1
  91. letta/utils.py +34 -1
  92. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724104456.dist-info}/METADATA +9 -8
  93. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724104456.dist-info}/RECORD +96 -68
  94. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724104456.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724104456.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724104456.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,707 @@
1
+ from datetime import datetime, timezone
2
+ from typing import Dict, List
3
+
4
+ from letta.errors import AgentFileExportError, AgentFileImportError
5
+ from letta.helpers.pinecone_utils import should_use_pinecone
6
+ from letta.log import get_logger
7
+ from letta.schemas.agent import AgentState, CreateAgent
8
+ from letta.schemas.agent_file import (
9
+ AgentFileSchema,
10
+ AgentSchema,
11
+ BlockSchema,
12
+ FileAgentSchema,
13
+ FileSchema,
14
+ GroupSchema,
15
+ ImportResult,
16
+ MessageSchema,
17
+ SourceSchema,
18
+ ToolSchema,
19
+ )
20
+ from letta.schemas.block import Block
21
+ from letta.schemas.enums import FileProcessingStatus
22
+ from letta.schemas.file import FileMetadata
23
+ from letta.schemas.message import Message
24
+ from letta.schemas.source import Source
25
+ from letta.schemas.tool import Tool
26
+ from letta.schemas.user import User
27
+ from letta.services.agent_manager import AgentManager
28
+ from letta.services.block_manager import BlockManager
29
+ from letta.services.file_manager import FileManager
30
+ from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
31
+ from letta.services.file_processor.embedder.pinecone_embedder import PineconeEmbedder
32
+ from letta.services.file_processor.file_processor import FileProcessor
33
+ from letta.services.file_processor.parser.markitdown_parser import MarkitdownFileParser
34
+ from letta.services.file_processor.parser.mistral_parser import MistralFileParser
35
+ from letta.services.files_agents_manager import FileAgentManager
36
+ from letta.services.group_manager import GroupManager
37
+ from letta.services.mcp_manager import MCPManager
38
+ from letta.services.message_manager import MessageManager
39
+ from letta.services.source_manager import SourceManager
40
+ from letta.services.tool_manager import ToolManager
41
+ from letta.settings import settings
42
+ from letta.utils import get_latest_alembic_revision
43
+
44
+ logger = get_logger(__name__)
45
+
46
+
47
+ class AgentSerializationManager:
48
+ """
49
+ Manages export and import of agent files between database and AgentFileSchema format.
50
+
51
+ Handles:
52
+ - ID mapping between database IDs and human-readable file IDs
53
+ - Coordination across multiple entity managers
54
+ - Transaction safety during imports
55
+ - Referential integrity validation
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ agent_manager: AgentManager,
61
+ tool_manager: ToolManager,
62
+ source_manager: SourceManager,
63
+ block_manager: BlockManager,
64
+ group_manager: GroupManager,
65
+ mcp_manager: MCPManager,
66
+ file_manager: FileManager,
67
+ file_agent_manager: FileAgentManager,
68
+ message_manager: MessageManager,
69
+ ):
70
+ self.agent_manager = agent_manager
71
+ self.tool_manager = tool_manager
72
+ self.source_manager = source_manager
73
+ self.block_manager = block_manager
74
+ self.group_manager = group_manager
75
+ self.mcp_manager = mcp_manager
76
+ self.file_manager = file_manager
77
+ self.file_agent_manager = file_agent_manager
78
+ self.message_manager = message_manager
79
+ self.file_parser = MistralFileParser() if settings.mistral_api_key else MarkitdownFileParser()
80
+ self.using_pinecone = should_use_pinecone()
81
+
82
+ # ID mapping state for export
83
+ self._db_to_file_ids: Dict[str, str] = {}
84
+
85
+ # Counters for generating Stripe-style IDs
86
+ self._id_counters: Dict[str, int] = {
87
+ AgentSchema.__id_prefix__: 0,
88
+ GroupSchema.__id_prefix__: 0,
89
+ BlockSchema.__id_prefix__: 0,
90
+ FileSchema.__id_prefix__: 0,
91
+ SourceSchema.__id_prefix__: 0,
92
+ ToolSchema.__id_prefix__: 0,
93
+ MessageSchema.__id_prefix__: 0,
94
+ FileAgentSchema.__id_prefix__: 0,
95
+ # MCPServerSchema.__id_prefix__: 0,
96
+ }
97
+
98
+ def _reset_state(self):
99
+ """Reset internal state for a new operation"""
100
+ self._db_to_file_ids.clear()
101
+ for key in self._id_counters:
102
+ self._id_counters[key] = 0
103
+
104
+ def _generate_file_id(self, entity_type: str) -> str:
105
+ """Generate a Stripe-style ID for the given entity type"""
106
+ counter = self._id_counters[entity_type]
107
+ file_id = f"{entity_type}-{counter}"
108
+ self._id_counters[entity_type] += 1
109
+ return file_id
110
+
111
+ def _map_db_to_file_id(self, db_id: str, entity_type: str, allow_new: bool = True) -> str:
112
+ """Map a database UUID to a file ID, creating if needed (export only)"""
113
+ if db_id in self._db_to_file_ids:
114
+ return self._db_to_file_ids[db_id]
115
+
116
+ if not allow_new:
117
+ raise AgentFileExportError(
118
+ f"Unexpected new {entity_type} ID '{db_id}' encountered during conversion. "
119
+ f"All IDs should have been mapped during agent processing."
120
+ )
121
+
122
+ file_id = self._generate_file_id(entity_type)
123
+ self._db_to_file_ids[db_id] = file_id
124
+ return file_id
125
+
126
+ def _extract_unique_tools(self, agent_states: List[AgentState]) -> List:
127
+ """Extract unique tools across all agent states by ID"""
128
+ all_tools = []
129
+ for agent_state in agent_states:
130
+ if agent_state.tools:
131
+ all_tools.extend(agent_state.tools)
132
+
133
+ unique_tools = {}
134
+ for tool in all_tools:
135
+ unique_tools[tool.id] = tool
136
+
137
+ return sorted(unique_tools.values(), key=lambda x: x.name)
138
+
139
+ def _extract_unique_blocks(self, agent_states: List[AgentState]) -> List:
140
+ """Extract unique blocks across all agent states by ID"""
141
+ all_blocks = []
142
+ for agent_state in agent_states:
143
+ if agent_state.memory and agent_state.memory.blocks:
144
+ all_blocks.extend(agent_state.memory.blocks)
145
+
146
+ unique_blocks = {}
147
+ for block in all_blocks:
148
+ unique_blocks[block.id] = block
149
+
150
+ return sorted(unique_blocks.values(), key=lambda x: x.label)
151
+
152
+ async def _extract_unique_sources_and_files_from_agents(
153
+ self, agent_states: List[AgentState], actor: User, files_agents_cache: dict = None
154
+ ) -> tuple[List[Source], List[FileMetadata]]:
155
+ """Extract unique sources and files from agent states using bulk operations"""
156
+
157
+ all_source_ids = set()
158
+ all_file_ids = set()
159
+
160
+ for agent_state in agent_states:
161
+ files_agents = await self.file_agent_manager.list_files_for_agent(
162
+ agent_id=agent_state.id,
163
+ actor=actor,
164
+ is_open_only=False,
165
+ return_as_blocks=False,
166
+ per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit,
167
+ )
168
+ # cache the results for reuse during conversion
169
+ if files_agents_cache is not None:
170
+ files_agents_cache[agent_state.id] = files_agents
171
+
172
+ for file_agent in files_agents:
173
+ all_source_ids.add(file_agent.source_id)
174
+ all_file_ids.add(file_agent.file_id)
175
+ sources = await self.source_manager.get_sources_by_ids_async(list(all_source_ids), actor)
176
+ files = await self.file_manager.get_files_by_ids_async(list(all_file_ids), actor, include_content=True)
177
+
178
+ return sources, files
179
+
180
+ async def _convert_agent_state_to_schema(self, agent_state: AgentState, actor: User, files_agents_cache: dict = None) -> AgentSchema:
181
+ """Convert AgentState to AgentSchema with ID remapping"""
182
+
183
+ agent_file_id = self._map_db_to_file_id(agent_state.id, AgentSchema.__id_prefix__)
184
+
185
+ # use cached file-agent data if available, otherwise fetch
186
+ if files_agents_cache is not None and agent_state.id in files_agents_cache:
187
+ files_agents = files_agents_cache[agent_state.id]
188
+ else:
189
+ files_agents = await self.file_agent_manager.list_files_for_agent(
190
+ agent_id=agent_state.id,
191
+ actor=actor,
192
+ is_open_only=False,
193
+ return_as_blocks=False,
194
+ per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit,
195
+ )
196
+ agent_schema = await AgentSchema.from_agent_state(
197
+ agent_state, message_manager=self.message_manager, files_agents=files_agents, actor=actor
198
+ )
199
+ agent_schema.id = agent_file_id
200
+
201
+ if agent_schema.messages:
202
+ for message in agent_schema.messages:
203
+ message_file_id = self._map_db_to_file_id(message.id, MessageSchema.__id_prefix__)
204
+ message.id = message_file_id
205
+ message.agent_id = agent_file_id
206
+
207
+ if agent_schema.in_context_message_ids:
208
+ agent_schema.in_context_message_ids = [
209
+ self._map_db_to_file_id(message_id, MessageSchema.__id_prefix__, allow_new=False)
210
+ for message_id in agent_schema.in_context_message_ids
211
+ ]
212
+
213
+ if agent_schema.tool_ids:
214
+ agent_schema.tool_ids = [self._map_db_to_file_id(tool_id, ToolSchema.__id_prefix__) for tool_id in agent_schema.tool_ids]
215
+
216
+ if agent_schema.source_ids:
217
+ agent_schema.source_ids = [
218
+ self._map_db_to_file_id(source_id, SourceSchema.__id_prefix__) for source_id in agent_schema.source_ids
219
+ ]
220
+
221
+ if agent_schema.block_ids:
222
+ agent_schema.block_ids = [self._map_db_to_file_id(block_id, BlockSchema.__id_prefix__) for block_id in agent_schema.block_ids]
223
+
224
+ if agent_schema.files_agents:
225
+ for file_agent in agent_schema.files_agents:
226
+ file_agent.file_id = self._map_db_to_file_id(file_agent.file_id, FileSchema.__id_prefix__)
227
+ file_agent.source_id = self._map_db_to_file_id(file_agent.source_id, SourceSchema.__id_prefix__)
228
+ file_agent.agent_id = agent_file_id
229
+
230
+ return agent_schema
231
+
232
+ def _convert_tool_to_schema(self, tool) -> ToolSchema:
233
+ """Convert Tool to ToolSchema with ID remapping"""
234
+ tool_file_id = self._map_db_to_file_id(tool.id, ToolSchema.__id_prefix__, allow_new=False)
235
+ tool_schema = ToolSchema.from_tool(tool)
236
+ tool_schema.id = tool_file_id
237
+ return tool_schema
238
+
239
+ def _convert_block_to_schema(self, block) -> BlockSchema:
240
+ """Convert Block to BlockSchema with ID remapping"""
241
+ block_file_id = self._map_db_to_file_id(block.id, BlockSchema.__id_prefix__, allow_new=False)
242
+ block_schema = BlockSchema.from_block(block)
243
+ block_schema.id = block_file_id
244
+ return block_schema
245
+
246
+ def _convert_source_to_schema(self, source) -> SourceSchema:
247
+ """Convert Source to SourceSchema with ID remapping"""
248
+ source_file_id = self._map_db_to_file_id(source.id, SourceSchema.__id_prefix__, allow_new=False)
249
+ source_schema = SourceSchema.from_source(source)
250
+ source_schema.id = source_file_id
251
+ return source_schema
252
+
253
+ def _convert_file_to_schema(self, file_metadata) -> FileSchema:
254
+ """Convert FileMetadata to FileSchema with ID remapping"""
255
+ file_file_id = self._map_db_to_file_id(file_metadata.id, FileSchema.__id_prefix__, allow_new=False)
256
+ file_schema = FileSchema.from_file_metadata(file_metadata)
257
+ file_schema.id = file_file_id
258
+ file_schema.source_id = self._map_db_to_file_id(file_metadata.source_id, SourceSchema.__id_prefix__, allow_new=False)
259
+ return file_schema
260
+
261
+ async def export(self, agent_ids: List[str], actor: User) -> AgentFileSchema:
262
+ """
263
+ Export agents and their related entities to AgentFileSchema format.
264
+
265
+ Args:
266
+ agent_ids: List of agent UUIDs to export
267
+
268
+ Returns:
269
+ AgentFileSchema with all related entities
270
+
271
+ Raises:
272
+ AgentFileExportError: If export fails
273
+ """
274
+ try:
275
+ self._reset_state()
276
+
277
+ agent_states = await self.agent_manager.get_agents_by_ids_async(agent_ids=agent_ids, actor=actor)
278
+
279
+ # Validate that all requested agents were found
280
+ if len(agent_states) != len(agent_ids):
281
+ found_ids = {agent.id for agent in agent_states}
282
+ missing_ids = [agent_id for agent_id in agent_ids if agent_id not in found_ids]
283
+ raise AgentFileExportError(f"The following agent IDs were not found: {missing_ids}")
284
+
285
+ # cache for file-agent relationships to avoid duplicate queries
286
+ files_agents_cache = {} # Maps agent_id to list of file_agent relationships
287
+
288
+ # Extract unique entities across all agents
289
+ tool_set = self._extract_unique_tools(agent_states)
290
+ block_set = self._extract_unique_blocks(agent_states)
291
+
292
+ # Extract sources and files from agent states BEFORE conversion (with caching)
293
+ source_set, file_set = await self._extract_unique_sources_and_files_from_agents(agent_states, actor, files_agents_cache)
294
+
295
+ # Convert to schemas with ID remapping (reusing cached file-agent data)
296
+ agent_schemas = [
297
+ await self._convert_agent_state_to_schema(agent_state, actor=actor, files_agents_cache=files_agents_cache)
298
+ for agent_state in agent_states
299
+ ]
300
+ tool_schemas = [self._convert_tool_to_schema(tool) for tool in tool_set]
301
+ block_schemas = [self._convert_block_to_schema(block) for block in block_set]
302
+ source_schemas = [self._convert_source_to_schema(source) for source in source_set]
303
+ file_schemas = [self._convert_file_to_schema(file_metadata) for file_metadata in file_set]
304
+
305
+ logger.info(f"Exporting {len(agent_ids)} agents to agent file format")
306
+
307
+ # Return AgentFileSchema with converted entities
308
+ return AgentFileSchema(
309
+ agents=agent_schemas,
310
+ groups=[], # TODO: Extract and convert groups
311
+ blocks=block_schemas,
312
+ files=file_schemas,
313
+ sources=source_schemas,
314
+ tools=tool_schemas,
315
+ # mcp_servers=[], # TODO: Extract and convert MCP servers
316
+ metadata={"revision_id": await get_latest_alembic_revision()},
317
+ created_at=datetime.now(timezone.utc),
318
+ )
319
+
320
+ except Exception as e:
321
+ logger.error(f"Failed to export agent file: {e}")
322
+ raise AgentFileExportError(f"Export failed: {e}") from e
323
+
324
+ async def import_file(self, schema: AgentFileSchema, actor: User, dry_run: bool = False) -> ImportResult:
325
+ """
326
+ Import AgentFileSchema into the database.
327
+
328
+ Args:
329
+ schema: The agent file schema to import
330
+ dry_run: If True, validate but don't commit changes
331
+
332
+ Returns:
333
+ ImportResult with success status and details
334
+
335
+ Raises:
336
+ AgentFileImportError: If import fails
337
+ """
338
+ try:
339
+ self._reset_state()
340
+
341
+ if dry_run:
342
+ logger.info("Starting dry run import validation")
343
+ else:
344
+ logger.info("Starting agent file import")
345
+
346
+ # Validate schema first
347
+ self._validate_schema(schema)
348
+
349
+ if dry_run:
350
+ return ImportResult(
351
+ success=True,
352
+ message="Dry run validation passed",
353
+ imported_count=0,
354
+ )
355
+
356
+ # Import in dependency order
357
+ imported_count = 0
358
+ file_to_db_ids = {} # Maps file IDs to new database IDs
359
+ # in-memory cache for file metadata to avoid repeated db calls
360
+ file_metadata_cache = {} # Maps database file ID to FileMetadata
361
+
362
+ # 1. Create tools first (no dependencies) - using bulk upsert for efficiency
363
+ if schema.tools:
364
+ # convert tool schemas to pydantic tools
365
+ pydantic_tools = []
366
+ for tool_schema in schema.tools:
367
+ pydantic_tools.append(Tool(**tool_schema.model_dump(exclude={"id"})))
368
+
369
+ # bulk upsert all tools at once
370
+ created_tools = await self.tool_manager.bulk_upsert_tools_async(pydantic_tools, actor)
371
+
372
+ # map file ids to database ids
373
+ # note: tools are matched by name during upsert, so we need to match by name here too
374
+ created_tools_by_name = {tool.name: tool for tool in created_tools}
375
+ for tool_schema in schema.tools:
376
+ created_tool = created_tools_by_name.get(tool_schema.name)
377
+ if created_tool:
378
+ file_to_db_ids[tool_schema.id] = created_tool.id
379
+ imported_count += 1
380
+ else:
381
+ logger.warning(f"Tool {tool_schema.name} was not created during bulk upsert")
382
+
383
+ # 2. Create blocks (no dependencies) - using batch create for efficiency
384
+ if schema.blocks:
385
+ # convert block schemas to pydantic blocks (excluding IDs to create new blocks)
386
+ pydantic_blocks = []
387
+ for block_schema in schema.blocks:
388
+ pydantic_blocks.append(Block(**block_schema.model_dump(exclude={"id"})))
389
+
390
+ # batch create all blocks at once
391
+ created_blocks = await self.block_manager.batch_create_blocks_async(pydantic_blocks, actor)
392
+
393
+ # map file ids to database ids
394
+ for block_schema, created_block in zip(schema.blocks, created_blocks):
395
+ file_to_db_ids[block_schema.id] = created_block.id
396
+ imported_count += 1
397
+
398
+ # 3. Create sources (no dependencies) - using bulk upsert for efficiency
399
+ if schema.sources:
400
+ # convert source schemas to pydantic sources
401
+ pydantic_sources = []
402
+ for source_schema in schema.sources:
403
+ source_data = source_schema.model_dump(exclude={"id", "embedding", "embedding_chunk_size"})
404
+ pydantic_sources.append(Source(**source_data))
405
+
406
+ # bulk upsert all sources at once
407
+ created_sources = await self.source_manager.bulk_upsert_sources_async(pydantic_sources, actor)
408
+
409
+ # map file ids to database ids
410
+ # note: sources are matched by name during upsert, so we need to match by name here too
411
+ created_sources_by_name = {source.name: source for source in created_sources}
412
+ for source_schema in schema.sources:
413
+ created_source = created_sources_by_name.get(source_schema.name)
414
+ if created_source:
415
+ file_to_db_ids[source_schema.id] = created_source.id
416
+ imported_count += 1
417
+ else:
418
+ logger.warning(f"Source {source_schema.name} was not created during bulk upsert")
419
+
420
+ # 4. Create files (depends on sources)
421
+ for file_schema in schema.files:
422
+ # Convert FileSchema back to FileMetadata
423
+ file_data = file_schema.model_dump(exclude={"id", "content"})
424
+ # Remap source_id from file ID to database ID
425
+ file_data["source_id"] = file_to_db_ids[file_schema.source_id]
426
+ # Set processing status to PARSING since we have parsed content but need to re-embed
427
+ file_data["processing_status"] = FileProcessingStatus.PARSING
428
+ file_data["error_message"] = None
429
+ file_data["total_chunks"] = None
430
+ file_data["chunks_embedded"] = None
431
+ file_metadata = FileMetadata(**file_data)
432
+ created_file = await self.file_manager.create_file(file_metadata, actor, text=file_schema.content)
433
+ file_to_db_ids[file_schema.id] = created_file.id
434
+ imported_count += 1
435
+
436
+ # 5. Process files for chunking/embedding (depends on files and sources)
437
+ if should_use_pinecone():
438
+ embedder = PineconeEmbedder(embedding_config=schema.agents[0].embedding_config)
439
+ else:
440
+ embedder = OpenAIEmbedder(embedding_config=schema.agents[0].embedding_config)
441
+ file_processor = FileProcessor(
442
+ file_parser=self.file_parser,
443
+ embedder=embedder,
444
+ actor=actor,
445
+ using_pinecone=self.using_pinecone,
446
+ )
447
+
448
+ for file_schema in schema.files:
449
+ if file_schema.content: # Only process files with content
450
+ file_db_id = file_to_db_ids[file_schema.id]
451
+ source_db_id = file_to_db_ids[file_schema.source_id]
452
+
453
+ # Get the created file metadata (with caching)
454
+ if file_db_id not in file_metadata_cache:
455
+ file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(file_db_id, actor)
456
+ file_metadata = file_metadata_cache[file_db_id]
457
+
458
+ # Save the db call of fetching content again
459
+ file_metadata.content = file_schema.content
460
+
461
+ # Process the file for chunking/embedding
462
+ passages = await file_processor.process_imported_file(file_metadata=file_metadata, source_id=source_db_id)
463
+ imported_count += len(passages)
464
+
465
+ # 6. Create agents with empty message history
466
+ for agent_schema in schema.agents:
467
+ # Convert AgentSchema back to CreateAgent, remapping tool/block IDs
468
+ agent_data = agent_schema.model_dump(exclude={"id", "in_context_message_ids", "messages"})
469
+
470
+ # Remap tool_ids from file IDs to database IDs
471
+ if agent_data.get("tool_ids"):
472
+ agent_data["tool_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["tool_ids"]]
473
+
474
+ # Remap block_ids from file IDs to database IDs
475
+ if agent_data.get("block_ids"):
476
+ agent_data["block_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["block_ids"]]
477
+
478
+ agent_create = CreateAgent(**agent_data)
479
+ created_agent = await self.agent_manager.create_agent_async(agent_create, actor, _init_with_no_messages=True)
480
+ file_to_db_ids[agent_schema.id] = created_agent.id
481
+ imported_count += 1
482
+
483
+ # 7. Create messages and update agent message_ids
484
+ for agent_schema in schema.agents:
485
+ agent_db_id = file_to_db_ids[agent_schema.id]
486
+ message_file_to_db_ids = {}
487
+
488
+ # Create messages for this agent
489
+ messages = []
490
+ for message_schema in agent_schema.messages:
491
+ # Convert MessageSchema back to Message, setting agent_id to new DB ID
492
+ message_data = message_schema.model_dump(exclude={"id"})
493
+ message_data["agent_id"] = agent_db_id # Remap agent_id to new database ID
494
+ message_obj = Message(**message_data)
495
+ messages.append(message_obj)
496
+ # Map file ID to the generated database ID immediately
497
+ message_file_to_db_ids[message_schema.id] = message_obj.id
498
+
499
+ created_messages = await self.message_manager.create_many_messages_async(pydantic_msgs=messages, actor=actor)
500
+ imported_count += len(created_messages)
501
+
502
+ # Remap in_context_message_ids from file IDs to database IDs
503
+ in_context_db_ids = [message_file_to_db_ids[message_schema_id] for message_schema_id in agent_schema.in_context_message_ids]
504
+
505
+ # Update agent with the correct message_ids
506
+ await self.agent_manager.update_message_ids_async(agent_id=agent_db_id, message_ids=in_context_db_ids, actor=actor)
507
+
508
+ # 8. Create file-agent relationships (depends on agents and files)
509
+ for agent_schema in schema.agents:
510
+ if agent_schema.files_agents:
511
+ agent_db_id = file_to_db_ids[agent_schema.id]
512
+
513
+ # Prepare files for bulk attachment
514
+ files_for_agent = []
515
+ visible_content_map = {}
516
+
517
+ for file_agent_schema in agent_schema.files_agents:
518
+ file_db_id = file_to_db_ids[file_agent_schema.file_id]
519
+
520
+ # Use cached file metadata if available
521
+ if file_db_id not in file_metadata_cache:
522
+ file_metadata_cache[file_db_id] = await self.file_manager.get_file_by_id(file_db_id, actor)
523
+ file_metadata = file_metadata_cache[file_db_id]
524
+ files_for_agent.append(file_metadata)
525
+
526
+ if file_agent_schema.visible_content:
527
+ visible_content_map[file_db_id] = file_agent_schema.visible_content
528
+
529
+ # Bulk attach files to agent
530
+ await self.file_agent_manager.attach_files_bulk(
531
+ agent_id=agent_db_id,
532
+ files_metadata=files_for_agent,
533
+ visible_content_map=visible_content_map,
534
+ actor=actor,
535
+ max_files_open=agent_schema.max_files_open,
536
+ )
537
+ imported_count += len(files_for_agent)
538
+
539
+ return ImportResult(
540
+ success=True,
541
+ message=f"Import completed successfully. Imported {imported_count} entities.",
542
+ imported_count=imported_count,
543
+ id_mappings=file_to_db_ids,
544
+ )
545
+
546
+ except Exception as e:
547
+ logger.exception(f"Failed to import agent file: {e}")
548
+ raise AgentFileImportError(f"Import failed: {e}") from e
549
+
550
+ def _validate_id_format(self, schema: AgentFileSchema) -> List[str]:
551
+ """Validate that all IDs follow the expected format"""
552
+ errors = []
553
+
554
+ # Define entity types and their expected prefixes
555
+ entity_checks = [
556
+ (schema.agents, AgentSchema.__id_prefix__),
557
+ (schema.groups, GroupSchema.__id_prefix__),
558
+ (schema.blocks, BlockSchema.__id_prefix__),
559
+ (schema.files, FileSchema.__id_prefix__),
560
+ (schema.sources, SourceSchema.__id_prefix__),
561
+ (schema.tools, ToolSchema.__id_prefix__),
562
+ ]
563
+
564
+ for entities, expected_prefix in entity_checks:
565
+ for entity in entities:
566
+ if not entity.id.startswith(f"{expected_prefix}-"):
567
+ errors.append(f"Invalid ID format: {entity.id} should start with '{expected_prefix}-'")
568
+ else:
569
+ # Check that the suffix is a valid integer
570
+ try:
571
+ suffix = entity.id[len(expected_prefix) + 1 :]
572
+ int(suffix)
573
+ except ValueError:
574
+ errors.append(f"Invalid ID format: {entity.id} should have integer suffix")
575
+
576
+ # Also check message IDs within agents
577
+ for agent in schema.agents:
578
+ for message in agent.messages:
579
+ if not message.id.startswith(f"{MessageSchema.__id_prefix__}-"):
580
+ errors.append(f"Invalid message ID format: {message.id} should start with '{MessageSchema.__id_prefix__}-'")
581
+ else:
582
+ # Check that the suffix is a valid integer
583
+ try:
584
+ suffix = message.id[len(MessageSchema.__id_prefix__) + 1 :]
585
+ int(suffix)
586
+ except ValueError:
587
+ errors.append(f"Invalid message ID format: {message.id} should have integer suffix")
588
+
589
+ return errors
590
+
591
+ def _validate_duplicate_ids(self, schema: AgentFileSchema) -> List[str]:
592
+ """Validate that there are no duplicate IDs within or across entity types"""
593
+ errors = []
594
+ all_ids = set()
595
+
596
+ # Check each entity type for internal duplicates and collect all IDs
597
+ entity_collections = [
598
+ ("agents", schema.agents),
599
+ ("groups", schema.groups),
600
+ ("blocks", schema.blocks),
601
+ ("files", schema.files),
602
+ ("sources", schema.sources),
603
+ ("tools", schema.tools),
604
+ ]
605
+
606
+ for entity_type, entities in entity_collections:
607
+ entity_ids = [entity.id for entity in entities]
608
+
609
+ # Check for duplicates within this entity type
610
+ seen = set()
611
+ duplicates = set()
612
+ for entity_id in entity_ids:
613
+ if entity_id in seen:
614
+ duplicates.add(entity_id)
615
+ else:
616
+ seen.add(entity_id)
617
+
618
+ if duplicates:
619
+ errors.append(f"Duplicate {entity_type} IDs found: {duplicates}")
620
+
621
+ # Check for duplicates across all entity types
622
+ for entity_id in entity_ids:
623
+ if entity_id in all_ids:
624
+ errors.append(f"Duplicate ID across entity types: {entity_id}")
625
+ all_ids.add(entity_id)
626
+
627
+ # Also check message IDs within agents
628
+ for agent in schema.agents:
629
+ message_ids = [msg.id for msg in agent.messages]
630
+
631
+ # Check for duplicates within agent messages
632
+ seen = set()
633
+ duplicates = set()
634
+ for message_id in message_ids:
635
+ if message_id in seen:
636
+ duplicates.add(message_id)
637
+ else:
638
+ seen.add(message_id)
639
+
640
+ if duplicates:
641
+ errors.append(f"Duplicate message IDs in agent {agent.id}: {duplicates}")
642
+
643
+ # Check for duplicates across all entity types
644
+ for message_id in message_ids:
645
+ if message_id in all_ids:
646
+ errors.append(f"Duplicate ID across entity types: {message_id}")
647
+ all_ids.add(message_id)
648
+
649
+ return errors
650
+
651
+ def _validate_file_source_references(self, schema: AgentFileSchema) -> List[str]:
652
+ """Validate that all file source_id references exist"""
653
+ errors = []
654
+ source_ids = {source.id for source in schema.sources}
655
+
656
+ for file in schema.files:
657
+ if file.source_id not in source_ids:
658
+ errors.append(f"File {file.id} references non-existent source {file.source_id}")
659
+
660
+ return errors
661
+
662
+ def _validate_file_agent_references(self, schema: AgentFileSchema) -> List[str]:
663
+ """Validate that all file-agent relationships reference existing entities"""
664
+ errors = []
665
+ file_ids = {file.id for file in schema.files}
666
+ source_ids = {source.id for source in schema.sources}
667
+ {agent.id for agent in schema.agents}
668
+
669
+ for agent in schema.agents:
670
+ for file_agent in agent.files_agents:
671
+ if file_agent.file_id not in file_ids:
672
+ errors.append(f"File-agent relationship references non-existent file {file_agent.file_id}")
673
+ if file_agent.source_id not in source_ids:
674
+ errors.append(f"File-agent relationship references non-existent source {file_agent.source_id}")
675
+ if file_agent.agent_id != agent.id:
676
+ errors.append(f"File-agent relationship has mismatched agent_id {file_agent.agent_id} vs {agent.id}")
677
+
678
+ return errors
679
+
680
+ def _validate_schema(self, schema: AgentFileSchema):
681
+ """
682
+ Validate the agent file schema for consistency and referential integrity.
683
+
684
+ Args:
685
+ schema: The schema to validate
686
+
687
+ Raises:
688
+ AgentFileImportError: If validation fails
689
+ """
690
+ errors = []
691
+
692
+ # 1. ID Format Validation
693
+ errors.extend(self._validate_id_format(schema))
694
+
695
+ # 2. Duplicate ID Detection
696
+ errors.extend(self._validate_duplicate_ids(schema))
697
+
698
+ # 3. File Source Reference Validation
699
+ errors.extend(self._validate_file_source_references(schema))
700
+
701
+ # 4. File-Agent Reference Validation
702
+ errors.extend(self._validate_file_agent_references(schema))
703
+
704
+ if errors:
705
+ raise AgentFileImportError(f"Schema validation failed: {'; '.join(errors)}")
706
+
707
+ logger.info("Schema validation passed")